1 /* 2 * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/vnode.h> 51 #include <sys/mountctl.h> 52 #include <sys/dirent.h> 53 #include <sys/uio.h> 54 #include <sys/objcache.h> 55 #include <sys/event.h> 56 #include <sys/file.h> 57 #include <vfs/fifofs/fifo.h> 58 59 #include "hammer2.h" 60 61 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 62 int seqcount); 63 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 64 int ioflag, int seqcount); 65 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 66 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 67 68 /* 69 * Last reference to a vnode is going away but it is still cached. 70 */ 71 static 72 int 73 hammer2_vop_inactive(struct vop_inactive_args *ap) 74 { 75 hammer2_inode_t *ip; 76 struct vnode *vp; 77 78 vp = ap->a_vp; 79 ip = VTOI(vp); 80 81 /* 82 * Degenerate case 83 */ 84 if (ip == NULL) { 85 vrecycle(vp); 86 return (0); 87 } 88 89 /* 90 * Aquire the inode lock to interlock against vp updates via 91 * the inode path and file deletions and such (which can be 92 * namespace-only operations that might not hold the vnode). 93 */ 94 hammer2_inode_lock(ip, 0); 95 if (ip->flags & HAMMER2_INODE_ISUNLINKED) { 96 hammer2_key_t lbase; 97 int nblksize; 98 99 /* 100 * If the inode has been unlinked we can throw away all 101 * buffers (dirty or not) and clean the file out. 102 * 103 * Because vrecycle() calls are not guaranteed, try to 104 * dispose of the inode as much as possible right here. 105 */ 106 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL); 107 nvtruncbuf(vp, 0, nblksize, 0, 0); 108 109 /* 110 * Delete the file on-media. 111 */ 112 if ((ip->flags & HAMMER2_INODE_DELETING) == 0) { 113 atomic_set_int(&ip->flags, HAMMER2_INODE_DELETING); 114 hammer2_inode_delayed_sideq(ip); 115 } 116 hammer2_inode_unlock(ip); 117 118 /* 119 * Recycle immediately if possible 120 */ 121 vrecycle(vp); 122 } else { 123 hammer2_inode_unlock(ip); 124 } 125 return (0); 126 } 127 128 /* 129 * Reclaim a vnode so that it can be reused; after the inode is 130 * disassociated, the filesystem must manage it alone. 131 */ 132 static 133 int 134 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 135 { 136 hammer2_inode_t *ip; 137 hammer2_pfs_t *pmp; 138 struct vnode *vp; 139 140 vp = ap->a_vp; 141 ip = VTOI(vp); 142 if (ip == NULL) 143 return(0); 144 145 pmp = ip->pmp; 146 147 /* 148 * NOTE! We do not attempt to flush chains here, flushing is 149 * really fragile and could also deadlock. 150 */ 151 vclrisdirty(vp); 152 153 /* 154 * The inode lock is required to disconnect it. 155 */ 156 hammer2_inode_lock(ip, 0); 157 vp->v_data = NULL; 158 ip->vp = NULL; 159 160 /* 161 * Delete the file on-media. This should have been handled by the 162 * inactivation. The operation is likely still queued on the inode 163 * though so only complain if the stars don't align. 164 */ 165 if ((ip->flags & (HAMMER2_INODE_ISUNLINKED | HAMMER2_INODE_DELETING)) == 166 HAMMER2_INODE_ISUNLINKED) 167 { 168 atomic_set_int(&ip->flags, HAMMER2_INODE_DELETING); 169 hammer2_inode_delayed_sideq(ip); 170 kprintf("hammer2: vp=%p ip=%p unlinked but not disposed\n", 171 vp, ip); 172 } 173 hammer2_inode_unlock(ip); 174 175 /* 176 * Modified inodes will already be on SIDEQ or SYNCQ, no further 177 * action is needed. 178 * 179 * We cannot safely synchronize the inode from inside the reclaim 180 * due to potentially deep locks held as-of when the reclaim occurs. 181 * Interactions and potential deadlocks abound. We also can't do it 182 * here without desynchronizing from the related directory entrie(s). 183 */ 184 hammer2_inode_drop(ip); /* vp ref */ 185 186 /* 187 * XXX handle background sync when ip dirty, kernel will no longer 188 * notify us regarding this inode because there is no longer a 189 * vnode attached to it. 190 */ 191 192 return (0); 193 } 194 195 /* 196 * Currently this function synchronizes the front-end inode state to the 197 * backend chain topology, then flushes the inode's chain and sub-topology 198 * to backend media. This function does not flush the root topology down to 199 * the inode. 200 */ 201 static 202 int 203 hammer2_vop_fsync(struct vop_fsync_args *ap) 204 { 205 hammer2_inode_t *ip; 206 struct vnode *vp; 207 int error1; 208 int error2; 209 210 vp = ap->a_vp; 211 ip = VTOI(vp); 212 error1 = 0; 213 214 hammer2_trans_init(ip->pmp, 0); 215 216 /* 217 * Flush dirty buffers in the file's logical buffer cache. 218 * It is best to wait for the strategy code to commit the 219 * buffers to the device's backing buffer cache before 220 * then trying to flush the inode. 221 * 222 * This should be quick, but certain inode modifications cached 223 * entirely in the hammer2_inode structure may not trigger a 224 * buffer read until the flush so the fsync can wind up also 225 * doing scattered reads. 226 */ 227 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 228 bio_track_wait(&vp->v_track_write, 0, 0); 229 230 /* 231 * Flush any inode changes 232 */ 233 hammer2_inode_lock(ip, 0); 234 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MODIFIED)) 235 error1 = hammer2_inode_chain_sync(ip); 236 237 /* 238 * Flush dirty chains related to the inode. 239 * 240 * NOTE! We are not in a flush transaction. The inode remains on 241 * the sideq so the filesystem syncer can synchronize it to 242 * the volume root. 243 */ 244 error2 = hammer2_inode_chain_flush(ip, HAMMER2_XOP_INODE_STOP); 245 if (error2) 246 error1 = error2; 247 248 /* 249 * We may be able to clear the vnode dirty flag. 250 */ 251 if ((ip->flags & (HAMMER2_INODE_MODIFIED | 252 HAMMER2_INODE_RESIZED | 253 HAMMER2_INODE_DIRTYDATA)) == 0 && 254 RB_EMPTY(&vp->v_rbdirty_tree) && 255 !bio_track_active(&vp->v_track_write)) { 256 vclrisdirty(vp); 257 } 258 hammer2_inode_unlock(ip); 259 hammer2_trans_done(ip->pmp, 0); 260 261 return (error1); 262 } 263 264 /* 265 * No lock needed, just handle ip->update 266 */ 267 static 268 int 269 hammer2_vop_access(struct vop_access_args *ap) 270 { 271 hammer2_inode_t *ip = VTOI(ap->a_vp); 272 uid_t uid; 273 gid_t gid; 274 mode_t mode; 275 uint32_t uflags; 276 int error; 277 int update; 278 279 retry: 280 update = spin_access_start(&ip->cluster_spin); 281 282 /*hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);*/ 283 uid = hammer2_to_unix_xid(&ip->meta.uid); 284 gid = hammer2_to_unix_xid(&ip->meta.gid); 285 mode = ip->meta.mode; 286 uflags = ip->meta.uflags; 287 /*hammer2_inode_unlock(ip);*/ 288 289 if (__predict_false(spin_access_end(&ip->cluster_spin, update))) 290 goto retry; 291 292 error = vop_helper_access(ap, uid, gid, mode, uflags); 293 294 return (error); 295 } 296 297 static 298 int 299 hammer2_vop_getattr(struct vop_getattr_args *ap) 300 { 301 hammer2_pfs_t *pmp; 302 hammer2_inode_t *ip; 303 struct vnode *vp; 304 struct vattr *vap; 305 int update; 306 307 vp = ap->a_vp; 308 vap = ap->a_vap; 309 310 ip = VTOI(vp); 311 pmp = ip->pmp; 312 313 retry: 314 update = spin_access_start(&ip->cluster_spin); 315 316 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 317 vap->va_fileid = ip->meta.inum; 318 vap->va_mode = ip->meta.mode; 319 vap->va_nlink = ip->meta.nlinks; 320 vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid); 321 vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid); 322 vap->va_rmajor = 0; 323 vap->va_rminor = 0; 324 vap->va_size = ip->meta.size; /* protected by shared lock */ 325 vap->va_blocksize = HAMMER2_PBUFSIZE; 326 vap->va_flags = ip->meta.uflags; 327 hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime); 328 hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime); 329 hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime); 330 vap->va_gen = 1; 331 vap->va_bytes = 0; 332 if (ip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) { 333 /* 334 * Can't really calculate directory use sans the files under 335 * it, just assume one block for now. 336 */ 337 vap->va_bytes += HAMMER2_INODE_BYTES; 338 } else { 339 vap->va_bytes = hammer2_inode_data_count(ip); 340 } 341 vap->va_type = hammer2_get_vtype(ip->meta.type); 342 vap->va_filerev = 0; 343 vap->va_uid_uuid = ip->meta.uid; 344 vap->va_gid_uuid = ip->meta.gid; 345 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 346 VA_FSID_UUID_VALID; 347 348 if (__predict_false(spin_access_end(&ip->cluster_spin, update))) 349 goto retry; 350 351 return (0); 352 } 353 354 static 355 int 356 hammer2_vop_getattr_lite(struct vop_getattr_lite_args *ap) 357 { 358 hammer2_pfs_t *pmp; 359 hammer2_inode_t *ip; 360 struct vnode *vp; 361 struct vattr_lite *lvap; 362 int update; 363 364 vp = ap->a_vp; 365 lvap = ap->a_lvap; 366 367 ip = VTOI(vp); 368 pmp = ip->pmp; 369 370 retry: 371 update = spin_access_start(&ip->cluster_spin); 372 373 #if 0 374 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 375 vap->va_fileid = ip->meta.inum; 376 #endif 377 lvap->va_mode = ip->meta.mode; 378 lvap->va_nlink = ip->meta.nlinks; 379 lvap->va_uid = hammer2_to_unix_xid(&ip->meta.uid); 380 lvap->va_gid = hammer2_to_unix_xid(&ip->meta.gid); 381 #if 0 382 vap->va_rmajor = 0; 383 vap->va_rminor = 0; 384 #endif 385 lvap->va_size = ip->meta.size; 386 #if 0 387 vap->va_blocksize = HAMMER2_PBUFSIZE; 388 #endif 389 lvap->va_flags = ip->meta.uflags; 390 lvap->va_type = hammer2_get_vtype(ip->meta.type); 391 #if 0 392 vap->va_filerev = 0; 393 vap->va_uid_uuid = ip->meta.uid; 394 vap->va_gid_uuid = ip->meta.gid; 395 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 396 VA_FSID_UUID_VALID; 397 #endif 398 399 if (__predict_false(spin_access_end(&ip->cluster_spin, update))) 400 goto retry; 401 402 return (0); 403 } 404 405 static 406 int 407 hammer2_vop_setattr(struct vop_setattr_args *ap) 408 { 409 hammer2_inode_t *ip; 410 struct vnode *vp; 411 struct vattr *vap; 412 int error; 413 int kflags = 0; 414 uint64_t ctime; 415 416 vp = ap->a_vp; 417 vap = ap->a_vap; 418 hammer2_update_time(&ctime); 419 420 ip = VTOI(vp); 421 422 if (ip->pmp->ronly) 423 return (EROFS); 424 425 /* 426 * Normally disallow setattr if there is no space, unless we 427 * are in emergency mode (might be needed to chflags -R noschg 428 * files prior to removal). 429 */ 430 if ((ip->pmp->flags & HAMMER2_PMPF_EMERG) == 0 && 431 hammer2_vfs_enospace(ip, 0, ap->a_cred) > 1) { 432 return (ENOSPC); 433 } 434 435 hammer2_trans_init(ip->pmp, 0); 436 hammer2_inode_lock(ip, 0); 437 error = 0; 438 439 if (vap->va_flags != VNOVAL) { 440 uint32_t flags; 441 442 flags = ip->meta.uflags; 443 error = vop_helper_setattr_flags(&flags, vap->va_flags, 444 hammer2_to_unix_xid(&ip->meta.uid), 445 ap->a_cred); 446 if (error == 0) { 447 if (ip->meta.uflags != flags) { 448 hammer2_inode_modify(ip); 449 hammer2_spin_lock_update(&ip->cluster_spin); 450 ip->meta.uflags = flags; 451 ip->meta.ctime = ctime; 452 hammer2_spin_unlock_update(&ip->cluster_spin); 453 kflags |= NOTE_ATTRIB; 454 } 455 if (ip->meta.uflags & (IMMUTABLE | APPEND)) { 456 error = 0; 457 goto done; 458 } 459 } 460 goto done; 461 } 462 if (ip->meta.uflags & (IMMUTABLE | APPEND)) { 463 error = EPERM; 464 goto done; 465 } 466 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 467 mode_t cur_mode = ip->meta.mode; 468 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid); 469 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid); 470 uuid_t uuid_uid; 471 uuid_t uuid_gid; 472 473 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 474 ap->a_cred, 475 &cur_uid, &cur_gid, &cur_mode); 476 if (error == 0) { 477 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 478 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 479 if (bcmp(&uuid_uid, &ip->meta.uid, sizeof(uuid_uid)) || 480 bcmp(&uuid_gid, &ip->meta.gid, sizeof(uuid_gid)) || 481 ip->meta.mode != cur_mode 482 ) { 483 hammer2_inode_modify(ip); 484 hammer2_spin_lock_update(&ip->cluster_spin); 485 ip->meta.uid = uuid_uid; 486 ip->meta.gid = uuid_gid; 487 ip->meta.mode = cur_mode; 488 ip->meta.ctime = ctime; 489 hammer2_spin_unlock_update(&ip->cluster_spin); 490 } 491 kflags |= NOTE_ATTRIB; 492 } 493 } 494 495 /* 496 * Resize the file 497 */ 498 if (vap->va_size != VNOVAL && ip->meta.size != vap->va_size) { 499 switch(vp->v_type) { 500 case VREG: 501 if (vap->va_size == ip->meta.size) 502 break; 503 if (vap->va_size < ip->meta.size) { 504 hammer2_mtx_ex(&ip->truncate_lock); 505 hammer2_truncate_file(ip, vap->va_size); 506 hammer2_mtx_unlock(&ip->truncate_lock); 507 kflags |= NOTE_WRITE; 508 } else { 509 hammer2_extend_file(ip, vap->va_size); 510 kflags |= NOTE_WRITE | NOTE_EXTEND; 511 } 512 hammer2_inode_modify(ip); 513 ip->meta.mtime = ctime; 514 vclrflags(vp, VLASTWRITETS); 515 break; 516 default: 517 error = EINVAL; 518 goto done; 519 } 520 } 521 #if 0 522 /* atime not supported */ 523 if (vap->va_atime.tv_sec != VNOVAL) { 524 hammer2_inode_modify(ip); 525 ip->meta.atime = hammer2_timespec_to_time(&vap->va_atime); 526 kflags |= NOTE_ATTRIB; 527 } 528 #endif 529 if (vap->va_mode != (mode_t)VNOVAL) { 530 mode_t cur_mode = ip->meta.mode; 531 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid); 532 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid); 533 534 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 535 cur_uid, cur_gid, &cur_mode); 536 if (error == 0) { 537 hammer2_inode_modify(ip); 538 hammer2_spin_lock_update(&ip->cluster_spin); 539 ip->meta.mode = cur_mode; 540 ip->meta.ctime = ctime; 541 hammer2_spin_unlock_update(&ip->cluster_spin); 542 kflags |= NOTE_ATTRIB; 543 } 544 } 545 546 if (vap->va_mtime.tv_sec != VNOVAL) { 547 hammer2_inode_modify(ip); 548 ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime); 549 kflags |= NOTE_ATTRIB; 550 vclrflags(vp, VLASTWRITETS); 551 } 552 553 done: 554 /* 555 * If a truncation occurred we must call chain_sync() now in order 556 * to trim the related data chains, otherwise a later expansion can 557 * cause havoc. 558 * 559 * If an extend occured that changed the DIRECTDATA state, we must 560 * call inode_chain_sync now in order to prepare the inode's indirect 561 * block table. 562 * 563 * WARNING! This means we are making an adjustment to the inode's 564 * chain outside of sync/fsync, and not just to inode->meta, which 565 * may result in some consistency issues if a crash were to occur 566 * at just the wrong time. 567 */ 568 if (ip->flags & HAMMER2_INODE_RESIZED) 569 hammer2_inode_chain_sync(ip); 570 571 /* 572 * Cleanup. 573 */ 574 hammer2_inode_unlock(ip); 575 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_SIDEQ); 576 hammer2_knote(ip->vp, kflags); 577 578 return (error); 579 } 580 581 static 582 int 583 hammer2_vop_readdir(struct vop_readdir_args *ap) 584 { 585 hammer2_xop_readdir_t *xop; 586 hammer2_blockref_t bref; 587 hammer2_inode_t *ip; 588 hammer2_tid_t inum; 589 hammer2_key_t lkey; 590 struct uio *uio; 591 off_t *cookies; 592 off_t saveoff; 593 int cookie_index; 594 int ncookies; 595 int error; 596 int eofflag; 597 int r; 598 599 ip = VTOI(ap->a_vp); 600 uio = ap->a_uio; 601 saveoff = uio->uio_offset; 602 eofflag = 0; 603 error = 0; 604 605 /* 606 * Setup cookies directory entry cookies if requested 607 */ 608 if (ap->a_ncookies) { 609 ncookies = uio->uio_resid / 16 + 1; 610 if (ncookies > 1024) 611 ncookies = 1024; 612 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 613 } else { 614 ncookies = -1; 615 cookies = NULL; 616 } 617 cookie_index = 0; 618 619 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED); 620 621 /* 622 * Handle artificial entries. To ensure that only positive 64 bit 623 * quantities are returned to userland we always strip off bit 63. 624 * The hash code is designed such that codes 0x0000-0x7FFF are not 625 * used, allowing us to use these codes for articial entries. 626 * 627 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 628 * allow '..' to cross the mount point into (e.g.) the super-root. 629 */ 630 if (saveoff == 0) { 631 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK; 632 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 633 if (r) 634 goto done; 635 if (cookies) 636 cookies[cookie_index] = saveoff; 637 ++saveoff; 638 ++cookie_index; 639 if (cookie_index == ncookies) 640 goto done; 641 } 642 643 if (saveoff == 1) { 644 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK; 645 if (ip != ip->pmp->iroot) 646 inum = ip->meta.iparent & HAMMER2_DIRHASH_USERMSK; 647 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 648 if (r) 649 goto done; 650 if (cookies) 651 cookies[cookie_index] = saveoff; 652 ++saveoff; 653 ++cookie_index; 654 if (cookie_index == ncookies) 655 goto done; 656 } 657 658 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 659 if (hammer2_debug & 0x0020) 660 kprintf("readdir: lkey %016jx\n", lkey); 661 if (error) 662 goto done; 663 664 xop = hammer2_xop_alloc(ip, 0); 665 xop->lkey = lkey; 666 hammer2_xop_start(&xop->head, &hammer2_readdir_desc); 667 668 for (;;) { 669 const hammer2_inode_data_t *ripdata; 670 const char *dname; 671 int dtype; 672 673 error = hammer2_xop_collect(&xop->head, 0); 674 error = hammer2_error_to_errno(error); 675 if (error) { 676 break; 677 } 678 if (cookie_index == ncookies) 679 break; 680 if (hammer2_debug & 0x0020) 681 kprintf("cluster chain %p %p\n", 682 xop->head.cluster.focus, 683 (xop->head.cluster.focus ? 684 xop->head.cluster.focus->data : (void *)-1)); 685 hammer2_cluster_bref(&xop->head.cluster, &bref); 686 687 if (bref.type == HAMMER2_BREF_TYPE_INODE) { 688 ripdata = &hammer2_xop_gdata(&xop->head)->ipdata; 689 dtype = hammer2_get_dtype(ripdata->meta.type); 690 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 691 r = vop_write_dirent(&error, uio, 692 ripdata->meta.inum & 693 HAMMER2_DIRHASH_USERMSK, 694 dtype, 695 ripdata->meta.name_len, 696 ripdata->filename); 697 hammer2_xop_pdata(&xop->head); 698 if (r) 699 break; 700 if (cookies) 701 cookies[cookie_index] = saveoff; 702 ++cookie_index; 703 } else if (bref.type == HAMMER2_BREF_TYPE_DIRENT) { 704 uint16_t namlen; 705 706 dtype = hammer2_get_dtype(bref.embed.dirent.type); 707 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 708 namlen = bref.embed.dirent.namlen; 709 if (namlen <= sizeof(bref.check.buf)) { 710 dname = bref.check.buf; 711 } else { 712 dname = hammer2_xop_gdata(&xop->head)->buf; 713 } 714 r = vop_write_dirent(&error, uio, 715 bref.embed.dirent.inum, dtype, 716 namlen, dname); 717 if (namlen > sizeof(bref.check.buf)) 718 hammer2_xop_pdata(&xop->head); 719 if (r) 720 break; 721 if (cookies) 722 cookies[cookie_index] = saveoff; 723 ++cookie_index; 724 } else { 725 /* XXX chain error */ 726 kprintf("bad chain type readdir %d\n", bref.type); 727 } 728 } 729 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 730 if (error == ENOENT) { 731 error = 0; 732 eofflag = 1; 733 saveoff = (hammer2_key_t)-1; 734 } else { 735 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 736 } 737 done: 738 hammer2_inode_unlock(ip); 739 if (ap->a_eofflag) 740 *ap->a_eofflag = eofflag; 741 if (hammer2_debug & 0x0020) 742 kprintf("readdir: done at %016jx\n", saveoff); 743 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 744 if (error && cookie_index == 0) { 745 if (cookies) { 746 kfree(cookies, M_TEMP); 747 *ap->a_ncookies = 0; 748 *ap->a_cookies = NULL; 749 } 750 } else { 751 if (cookies) { 752 *ap->a_ncookies = cookie_index; 753 *ap->a_cookies = cookies; 754 } 755 } 756 return (error); 757 } 758 759 /* 760 * hammer2_vop_readlink { vp, uio, cred } 761 */ 762 static 763 int 764 hammer2_vop_readlink(struct vop_readlink_args *ap) 765 { 766 struct vnode *vp; 767 hammer2_inode_t *ip; 768 int error; 769 770 vp = ap->a_vp; 771 if (vp->v_type != VLNK) 772 return (EINVAL); 773 ip = VTOI(vp); 774 775 error = hammer2_read_file(ip, ap->a_uio, 0); 776 return (error); 777 } 778 779 static 780 int 781 hammer2_vop_read(struct vop_read_args *ap) 782 { 783 struct vnode *vp; 784 hammer2_inode_t *ip; 785 struct uio *uio; 786 int error; 787 int seqcount; 788 789 /* 790 * Read operations supported on this vnode? 791 */ 792 vp = ap->a_vp; 793 if (vp->v_type == VDIR) 794 return (EISDIR); 795 if (vp->v_type != VREG) 796 return (EINVAL); 797 798 /* 799 * Misc 800 */ 801 ip = VTOI(vp); 802 uio = ap->a_uio; 803 error = 0; 804 805 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 806 807 error = hammer2_read_file(ip, uio, seqcount); 808 return (error); 809 } 810 811 static 812 int 813 hammer2_vop_write(struct vop_write_args *ap) 814 { 815 hammer2_inode_t *ip; 816 thread_t td; 817 struct vnode *vp; 818 struct uio *uio; 819 int error; 820 int seqcount; 821 int ioflag; 822 823 /* 824 * Read operations supported on this vnode? 825 */ 826 vp = ap->a_vp; 827 if (vp->v_type != VREG) 828 return (EINVAL); 829 830 /* 831 * Misc 832 */ 833 ip = VTOI(vp); 834 ioflag = ap->a_ioflag; 835 uio = ap->a_uio; 836 error = 0; 837 if (ip->pmp->ronly || (ip->pmp->flags & HAMMER2_PMPF_EMERG)) 838 return (EROFS); 839 switch (hammer2_vfs_enospace(ip, uio->uio_resid, ap->a_cred)) { 840 case 2: 841 return (ENOSPC); 842 case 1: 843 ioflag |= IO_DIRECT; /* semi-synchronous */ 844 /* fall through */ 845 default: 846 break; 847 } 848 849 seqcount = ioflag >> IO_SEQSHIFT; 850 851 /* 852 * Check resource limit 853 */ 854 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 855 uio->uio_offset + uio->uio_resid > 856 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 857 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 858 return (EFBIG); 859 } 860 861 /* 862 * The transaction interlocks against flush initiations 863 * (note: but will run concurrently with the actual flush). 864 * 865 * To avoid deadlocking against the VM system, we must flag any 866 * transaction related to the buffer cache or other direct 867 * VM page manipulation. 868 */ 869 if (uio->uio_segflg == UIO_NOCOPY) { 870 hammer2_trans_init(ip->pmp, HAMMER2_TRANS_BUFCACHE); 871 } else { 872 hammer2_trans_init(ip->pmp, 0); 873 } 874 error = hammer2_write_file(ip, uio, ioflag, seqcount); 875 if (uio->uio_segflg == UIO_NOCOPY) 876 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_BUFCACHE | 877 HAMMER2_TRANS_SIDEQ); 878 else 879 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_SIDEQ); 880 881 return (error); 882 } 883 884 /* 885 * Perform read operations on a file or symlink given an UNLOCKED 886 * inode and uio. 887 * 888 * The passed ip is not locked. 889 */ 890 static 891 int 892 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 893 { 894 hammer2_off_t size; 895 struct buf *bp; 896 int error; 897 898 error = 0; 899 900 /* 901 * UIO read loop. 902 * 903 * WARNING! Assumes that the kernel interlocks size changes at the 904 * vnode level. 905 */ 906 hammer2_mtx_sh(&ip->lock); 907 hammer2_mtx_sh(&ip->truncate_lock); 908 size = ip->meta.size; 909 hammer2_mtx_unlock(&ip->lock); 910 911 while (uio->uio_resid > 0 && uio->uio_offset < size) { 912 hammer2_key_t lbase; 913 hammer2_key_t leof; 914 int lblksize; 915 int loff; 916 int n; 917 918 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 919 &lbase, &leof); 920 921 #if 1 922 bp = NULL; 923 error = cluster_readx(ip->vp, leof, lbase, lblksize, 924 B_NOTMETA | B_KVABIO, 925 uio->uio_resid, 926 seqcount * MAXBSIZE, 927 &bp); 928 #else 929 if (uio->uio_segflg == UIO_NOCOPY) { 930 bp = getblk(ip->vp, lbase, lblksize, 931 GETBLK_BHEAVY | GETBLK_KVABIO, 0); 932 if (bp->b_flags & B_CACHE) { 933 int i; 934 int j = 0; 935 if (bp->b_xio.xio_npages != 16) 936 kprintf("NPAGES BAD\n"); 937 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 938 vm_page_t m; 939 m = bp->b_xio.xio_pages[i]; 940 if (m == NULL || m->valid == 0) { 941 kprintf("bp %016jx %016jx pg %d inv", 942 lbase, leof, i); 943 if (m) 944 kprintf("m->object %p/%p", m->object, ip->vp->v_object); 945 kprintf("\n"); 946 j = 1; 947 } 948 } 949 if (j) 950 kprintf("b_flags %08x, b_error %d\n", bp->b_flags, bp->b_error); 951 } 952 bqrelse(bp); 953 } 954 error = bread_kvabio(ip->vp, lbase, lblksize, &bp); 955 #endif 956 if (error) { 957 brelse(bp); 958 break; 959 } 960 bkvasync(bp); 961 loff = (int)(uio->uio_offset - lbase); 962 n = lblksize - loff; 963 if (n > uio->uio_resid) 964 n = uio->uio_resid; 965 if (n > size - uio->uio_offset) 966 n = (int)(size - uio->uio_offset); 967 bp->b_flags |= B_AGE; 968 uiomovebp(bp, (char *)bp->b_data + loff, n, uio); 969 bqrelse(bp); 970 } 971 hammer2_mtx_unlock(&ip->truncate_lock); 972 973 return (error); 974 } 975 976 /* 977 * Write to the file represented by the inode via the logical buffer cache. 978 * The inode may represent a regular file or a symlink. 979 * 980 * The inode must not be locked. 981 */ 982 static 983 int 984 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 985 int ioflag, int seqcount) 986 { 987 hammer2_key_t old_eof; 988 hammer2_key_t new_eof; 989 struct buf *bp; 990 int kflags; 991 int error; 992 int modified; 993 994 /* 995 * Setup if append 996 * 997 * WARNING! Assumes that the kernel interlocks size changes at the 998 * vnode level. 999 */ 1000 hammer2_mtx_ex(&ip->lock); 1001 hammer2_mtx_sh(&ip->truncate_lock); 1002 if (ioflag & IO_APPEND) 1003 uio->uio_offset = ip->meta.size; 1004 old_eof = ip->meta.size; 1005 1006 /* 1007 * Extend the file if necessary. If the write fails at some point 1008 * we will truncate it back down to cover as much as we were able 1009 * to write. 1010 * 1011 * Doing this now makes it easier to calculate buffer sizes in 1012 * the loop. 1013 */ 1014 kflags = 0; 1015 error = 0; 1016 modified = 0; 1017 1018 if (uio->uio_offset + uio->uio_resid > old_eof) { 1019 new_eof = uio->uio_offset + uio->uio_resid; 1020 modified = 1; 1021 hammer2_extend_file(ip, new_eof); 1022 kflags |= NOTE_EXTEND; 1023 } else { 1024 new_eof = old_eof; 1025 } 1026 hammer2_mtx_unlock(&ip->lock); 1027 1028 /* 1029 * UIO write loop 1030 */ 1031 while (uio->uio_resid > 0) { 1032 hammer2_key_t lbase; 1033 int trivial; 1034 int endofblk; 1035 int lblksize; 1036 int loff; 1037 int n; 1038 1039 /* 1040 * Don't allow the buffer build to blow out the buffer 1041 * cache. 1042 */ 1043 if ((ioflag & IO_RECURSE) == 0) 1044 bwillwrite(HAMMER2_PBUFSIZE); 1045 1046 /* 1047 * This nominally tells us how much we can cluster and 1048 * what the logical buffer size needs to be. Currently 1049 * we don't try to cluster the write and just handle one 1050 * block at a time. 1051 */ 1052 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1053 &lbase, NULL); 1054 loff = (int)(uio->uio_offset - lbase); 1055 1056 KKASSERT(lblksize <= MAXBSIZE); 1057 1058 /* 1059 * Calculate bytes to copy this transfer and whether the 1060 * copy completely covers the buffer or not. 1061 */ 1062 trivial = 0; 1063 n = lblksize - loff; 1064 if (n > uio->uio_resid) { 1065 n = uio->uio_resid; 1066 if (loff == lbase && uio->uio_offset + n == new_eof) 1067 trivial = 1; 1068 endofblk = 0; 1069 } else { 1070 if (loff == 0) 1071 trivial = 1; 1072 endofblk = 1; 1073 } 1074 if (lbase >= new_eof) 1075 trivial = 1; 1076 1077 /* 1078 * Get the buffer 1079 */ 1080 if (uio->uio_segflg == UIO_NOCOPY) { 1081 /* 1082 * Issuing a write with the same data backing the 1083 * buffer. Instantiate the buffer to collect the 1084 * backing vm pages, then read-in any missing bits. 1085 * 1086 * This case is used by vop_stdputpages(). 1087 */ 1088 bp = getblk(ip->vp, lbase, lblksize, 1089 GETBLK_BHEAVY | GETBLK_KVABIO, 0); 1090 if ((bp->b_flags & B_CACHE) == 0) { 1091 bqrelse(bp); 1092 error = bread_kvabio(ip->vp, lbase, 1093 lblksize, &bp); 1094 } 1095 } else if (trivial) { 1096 /* 1097 * Even though we are entirely overwriting the buffer 1098 * we may still have to zero it out to avoid a 1099 * mmap/write visibility issue. 1100 */ 1101 bp = getblk(ip->vp, lbase, lblksize, 1102 GETBLK_BHEAVY | GETBLK_KVABIO, 0); 1103 if ((bp->b_flags & B_CACHE) == 0) 1104 vfs_bio_clrbuf(bp); 1105 } else { 1106 /* 1107 * Partial overwrite, read in any missing bits then 1108 * replace the portion being written. 1109 * 1110 * (The strategy code will detect zero-fill physical 1111 * blocks for this case). 1112 */ 1113 error = bread_kvabio(ip->vp, lbase, lblksize, &bp); 1114 if (error == 0) 1115 bheavy(bp); 1116 } 1117 1118 if (error) { 1119 brelse(bp); 1120 break; 1121 } 1122 1123 /* 1124 * Ok, copy the data in 1125 */ 1126 bkvasync(bp); 1127 error = uiomovebp(bp, bp->b_data + loff, n, uio); 1128 kflags |= NOTE_WRITE; 1129 modified = 1; 1130 if (error) { 1131 brelse(bp); 1132 break; 1133 } 1134 1135 /* 1136 * WARNING: Pageout daemon will issue UIO_NOCOPY writes 1137 * with IO_SYNC or IO_ASYNC set. These writes 1138 * must be handled as the pageout daemon expects. 1139 * 1140 * NOTE! H2 relies on cluster_write() here because it 1141 * cannot preallocate disk blocks at the logical 1142 * level due to not knowing what the compression 1143 * size will be at this time. 1144 * 1145 * We must use cluster_write() here and we depend 1146 * on the write-behind feature to flush buffers 1147 * appropriately. If we let the buffer daemons do 1148 * it the block allocations will be all over the 1149 * map. 1150 */ 1151 if (ioflag & IO_SYNC) { 1152 bwrite(bp); 1153 } else if ((ioflag & IO_DIRECT) && endofblk) { 1154 bawrite(bp); 1155 } else if (ioflag & IO_ASYNC) { 1156 bawrite(bp); 1157 } else if (ip->vp->v_mount->mnt_flag & MNT_NOCLUSTERW) { 1158 bdwrite(bp); 1159 } else { 1160 #if 1 1161 bp->b_flags |= B_CLUSTEROK; 1162 cluster_write(bp, new_eof, lblksize, seqcount); 1163 #else 1164 bp->b_flags |= B_CLUSTEROK; 1165 bdwrite(bp); 1166 #endif 1167 } 1168 } 1169 1170 /* 1171 * Cleanup. If we extended the file EOF but failed to write through 1172 * the entire write is a failure and we have to back-up. 1173 */ 1174 if (error && new_eof != old_eof) { 1175 hammer2_mtx_unlock(&ip->truncate_lock); 1176 hammer2_mtx_ex(&ip->lock); /* note lock order */ 1177 hammer2_mtx_ex(&ip->truncate_lock); /* note lock order */ 1178 hammer2_truncate_file(ip, old_eof); 1179 if (ip->flags & HAMMER2_INODE_MODIFIED) 1180 hammer2_inode_chain_sync(ip); 1181 hammer2_mtx_unlock(&ip->lock); 1182 } else if (modified) { 1183 struct vnode *vp = ip->vp; 1184 1185 hammer2_mtx_ex(&ip->lock); 1186 hammer2_inode_modify(ip); 1187 if (uio->uio_segflg == UIO_NOCOPY) { 1188 if (vp->v_flag & VLASTWRITETS) { 1189 ip->meta.mtime = 1190 (unsigned long)vp->v_lastwrite_ts.tv_sec * 1191 1000000 + 1192 vp->v_lastwrite_ts.tv_nsec / 1000; 1193 } 1194 } else { 1195 hammer2_update_time(&ip->meta.mtime); 1196 vclrflags(vp, VLASTWRITETS); 1197 } 1198 1199 #if 0 1200 /* 1201 * REMOVED - handled by hammer2_extend_file(). Do not issue 1202 * a chain_sync() outside of a sync/fsync except for DIRECTDATA 1203 * state changes. 1204 * 1205 * Under normal conditions we only issue a chain_sync if 1206 * the inode's DIRECTDATA state changed. 1207 */ 1208 if (ip->flags & HAMMER2_INODE_RESIZED) 1209 hammer2_inode_chain_sync(ip); 1210 #endif 1211 hammer2_mtx_unlock(&ip->lock); 1212 hammer2_knote(ip->vp, kflags); 1213 } 1214 hammer2_trans_assert_strategy(ip->pmp); 1215 hammer2_mtx_unlock(&ip->truncate_lock); 1216 1217 return error; 1218 } 1219 1220 /* 1221 * Truncate the size of a file. The inode must be locked. 1222 * 1223 * We must unconditionally set HAMMER2_INODE_RESIZED to properly 1224 * ensure that any on-media data beyond the new file EOF has been destroyed. 1225 * 1226 * WARNING: nvtruncbuf() can only be safely called without the inode lock 1227 * held due to the way our write thread works. If the truncation 1228 * occurs in the middle of a buffer, nvtruncbuf() is responsible 1229 * for dirtying that buffer and zeroing out trailing bytes. 1230 * 1231 * WARNING! Assumes that the kernel interlocks size changes at the 1232 * vnode level. 1233 * 1234 * WARNING! Caller assumes responsibility for removing dead blocks 1235 * if INODE_RESIZED is set. 1236 */ 1237 static 1238 void 1239 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1240 { 1241 hammer2_key_t lbase; 1242 int nblksize; 1243 1244 hammer2_mtx_unlock(&ip->lock); 1245 if (ip->vp) { 1246 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1247 nvtruncbuf(ip->vp, nsize, 1248 nblksize, (int)nsize & (nblksize - 1), 1249 0); 1250 } 1251 hammer2_mtx_ex(&ip->lock); 1252 KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0); 1253 ip->osize = ip->meta.size; 1254 ip->meta.size = nsize; 1255 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1256 hammer2_inode_modify(ip); 1257 } 1258 1259 /* 1260 * Extend the size of a file. The inode must be locked. 1261 * 1262 * Even though the file size is changing, we do not have to set the 1263 * INODE_RESIZED bit unless the file size crosses the EMBEDDED_BYTES 1264 * boundary. When this occurs a hammer2_inode_chain_sync() is required 1265 * to prepare the inode cluster's indirect block table, otherwise 1266 * async execution of the strategy code will implode on us. 1267 * 1268 * WARNING! Assumes that the kernel interlocks size changes at the 1269 * vnode level. 1270 * 1271 * WARNING! Caller assumes responsibility for transitioning out 1272 * of the inode DIRECTDATA mode if INODE_RESIZED is set. 1273 */ 1274 static 1275 void 1276 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1277 { 1278 hammer2_key_t lbase; 1279 hammer2_key_t osize; 1280 int oblksize; 1281 int nblksize; 1282 int error; 1283 1284 KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0); 1285 hammer2_inode_modify(ip); 1286 osize = ip->meta.size; 1287 ip->osize = osize; 1288 ip->meta.size = nsize; 1289 1290 /* 1291 * We must issue a chain_sync() when the DIRECTDATA state changes 1292 * to prevent confusion between the flush code and the in-memory 1293 * state. This is not perfect because we are doing it outside of 1294 * a sync/fsync operation, so it might not be fully synchronized 1295 * with the meta-data topology flush. 1296 * 1297 * We must retain and re-dirty the buffer cache buffer containing 1298 * the direct data so it can be written to a real block. It should 1299 * not be possible for a bread error to occur since the original data 1300 * is extracted from the inode structure directly. 1301 */ 1302 if (osize <= HAMMER2_EMBEDDED_BYTES && nsize > HAMMER2_EMBEDDED_BYTES) { 1303 if (osize) { 1304 struct buf *bp; 1305 1306 oblksize = hammer2_calc_logical(ip, 0, NULL, NULL); 1307 error = bread_kvabio(ip->vp, 0, oblksize, &bp); 1308 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1309 hammer2_inode_chain_sync(ip); 1310 if (error == 0) { 1311 bheavy(bp); 1312 bdwrite(bp); 1313 } else { 1314 brelse(bp); 1315 } 1316 } else { 1317 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1318 hammer2_inode_chain_sync(ip); 1319 } 1320 } 1321 hammer2_mtx_unlock(&ip->lock); 1322 if (ip->vp) { 1323 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1324 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1325 nvextendbuf(ip->vp, 1326 osize, nsize, 1327 oblksize, nblksize, 1328 -1, -1, 0); 1329 } 1330 hammer2_mtx_ex(&ip->lock); 1331 } 1332 1333 static 1334 int 1335 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1336 { 1337 hammer2_xop_nresolve_t *xop; 1338 hammer2_inode_t *ip; 1339 hammer2_inode_t *dip; 1340 struct namecache *ncp; 1341 struct vnode *vp; 1342 int error; 1343 1344 dip = VTOI(ap->a_dvp); 1345 xop = hammer2_xop_alloc(dip, 0); 1346 1347 ncp = ap->a_nch->ncp; 1348 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen); 1349 1350 /* 1351 * Note: In DragonFly the kernel handles '.' and '..'. 1352 */ 1353 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED); 1354 hammer2_xop_start(&xop->head, &hammer2_nresolve_desc); 1355 1356 error = hammer2_xop_collect(&xop->head, 0); 1357 error = hammer2_error_to_errno(error); 1358 if (error) { 1359 ip = NULL; 1360 } else { 1361 ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1); 1362 } 1363 hammer2_inode_unlock(dip); 1364 1365 /* 1366 * Acquire the related vnode 1367 * 1368 * NOTE: For error processing, only ENOENT resolves the namecache 1369 * entry to NULL, otherwise we just return the error and 1370 * leave the namecache unresolved. 1371 * 1372 * WARNING: inode structure is locked exclusively via inode_get 1373 * but chain was locked shared. inode_unlock() 1374 * will handle it properly. 1375 */ 1376 if (ip) { 1377 vp = hammer2_igetv(ip, &error); /* error set to UNIX error */ 1378 if (error == 0) { 1379 vn_unlock(vp); 1380 cache_setvp(ap->a_nch, vp); 1381 } else if (error == ENOENT) { 1382 cache_setvp(ap->a_nch, NULL); 1383 } 1384 hammer2_inode_unlock(ip); 1385 1386 /* 1387 * The vp should not be released until after we've disposed 1388 * of our locks, because it might cause vop_inactive() to 1389 * be called. 1390 */ 1391 if (vp) 1392 vrele(vp); 1393 } else { 1394 error = ENOENT; 1395 cache_setvp(ap->a_nch, NULL); 1396 } 1397 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1398 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1399 ("resolve error %d/%p ap %p\n", 1400 error, ap->a_nch->ncp->nc_vp, ap)); 1401 1402 return error; 1403 } 1404 1405 static 1406 int 1407 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1408 { 1409 hammer2_inode_t *dip; 1410 hammer2_tid_t inum; 1411 int error; 1412 1413 dip = VTOI(ap->a_dvp); 1414 inum = dip->meta.iparent; 1415 *ap->a_vpp = NULL; 1416 1417 if (inum) { 1418 error = hammer2_vfs_vget(ap->a_dvp->v_mount, NULL, 1419 inum, ap->a_vpp); 1420 } else { 1421 error = ENOENT; 1422 } 1423 return error; 1424 } 1425 1426 static 1427 int 1428 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1429 { 1430 hammer2_inode_t *dip; 1431 hammer2_inode_t *nip; 1432 struct namecache *ncp; 1433 const uint8_t *name; 1434 size_t name_len; 1435 hammer2_tid_t inum; 1436 int error; 1437 1438 dip = VTOI(ap->a_dvp); 1439 if (dip->pmp->ronly || (dip->pmp->flags & HAMMER2_PMPF_EMERG)) 1440 return (EROFS); 1441 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1442 return (ENOSPC); 1443 1444 ncp = ap->a_nch->ncp; 1445 name = ncp->nc_name; 1446 name_len = ncp->nc_nlen; 1447 1448 hammer2_trans_init(dip->pmp, 0); 1449 1450 inum = hammer2_trans_newinum(dip->pmp); 1451 1452 /* 1453 * Create the actual inode as a hidden file in the iroot, then 1454 * create the directory entry. The creation of the actual inode 1455 * sets its nlinks to 1 which is the value we desire. 1456 * 1457 * dip must be locked before nip to avoid deadlock. 1458 */ 1459 hammer2_inode_lock(dip, 0); 1460 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1461 inum, &error); 1462 if (error) { 1463 error = hammer2_error_to_errno(error); 1464 } else { 1465 error = hammer2_dirent_create(dip, name, name_len, 1466 nip->meta.inum, nip->meta.type); 1467 /* returns UNIX error code */ 1468 } 1469 if (error) { 1470 if (nip) { 1471 hammer2_inode_unlink_finisher(nip, NULL); 1472 hammer2_inode_unlock(nip); 1473 nip = NULL; 1474 } 1475 *ap->a_vpp = NULL; 1476 } else { 1477 /* 1478 * inode_depend() must occur before the igetv() because 1479 * the igetv() can temporarily release the inode lock. 1480 */ 1481 hammer2_inode_depend(dip, nip); /* before igetv */ 1482 *ap->a_vpp = hammer2_igetv(nip, &error); 1483 hammer2_inode_unlock(nip); 1484 } 1485 1486 /* 1487 * Update dip's mtime 1488 * 1489 * We can use a shared inode lock and allow the meta.mtime update 1490 * SMP race. hammer2_inode_modify() is MPSAFE w/a shared lock. 1491 */ 1492 if (error == 0) { 1493 uint64_t mtime; 1494 1495 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1496 hammer2_update_time(&mtime); 1497 hammer2_inode_modify(dip); 1498 dip->meta.mtime = mtime; 1499 /*hammer2_inode_unlock(dip);*/ 1500 } 1501 hammer2_inode_unlock(dip); 1502 1503 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1504 1505 if (error == 0) { 1506 cache_setunresolved(ap->a_nch); 1507 cache_setvp(ap->a_nch, *ap->a_vpp); 1508 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); 1509 } 1510 return error; 1511 } 1512 1513 static 1514 int 1515 hammer2_vop_open(struct vop_open_args *ap) 1516 { 1517 return vop_stdopen(ap); 1518 } 1519 1520 /* 1521 * hammer2_vop_advlock { vp, id, op, fl, flags } 1522 */ 1523 static 1524 int 1525 hammer2_vop_advlock(struct vop_advlock_args *ap) 1526 { 1527 hammer2_inode_t *ip = VTOI(ap->a_vp); 1528 hammer2_off_t size; 1529 1530 size = ip->meta.size; 1531 return (lf_advlock(ap, &ip->advlock, size)); 1532 } 1533 1534 static 1535 int 1536 hammer2_vop_close(struct vop_close_args *ap) 1537 { 1538 return vop_stdclose(ap); 1539 } 1540 1541 /* 1542 * hammer2_vop_nlink { nch, dvp, vp, cred } 1543 * 1544 * Create a hardlink from (vp) to {dvp, nch}. 1545 */ 1546 static 1547 int 1548 hammer2_vop_nlink(struct vop_nlink_args *ap) 1549 { 1550 hammer2_inode_t *tdip; /* target directory to create link in */ 1551 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1552 struct namecache *ncp; 1553 const uint8_t *name; 1554 size_t name_len; 1555 int error; 1556 uint64_t cmtime; 1557 1558 if (ap->a_dvp->v_mount != ap->a_vp->v_mount) 1559 return(EXDEV); 1560 1561 tdip = VTOI(ap->a_dvp); 1562 if (tdip->pmp->ronly || (tdip->pmp->flags & HAMMER2_PMPF_EMERG)) 1563 return (EROFS); 1564 if (hammer2_vfs_enospace(tdip, 0, ap->a_cred) > 1) 1565 return (ENOSPC); 1566 1567 ncp = ap->a_nch->ncp; 1568 name = ncp->nc_name; 1569 name_len = ncp->nc_nlen; 1570 1571 /* 1572 * ip represents the file being hardlinked. The file could be a 1573 * normal file or a hardlink target if it has already been hardlinked. 1574 * (with the new semantics, it will almost always be a hardlink 1575 * target). 1576 * 1577 * Bump nlinks and potentially also create or move the hardlink 1578 * target in the parent directory common to (ip) and (tdip). The 1579 * consolidation code can modify ip->cluster. The returned cluster 1580 * is locked. 1581 */ 1582 ip = VTOI(ap->a_vp); 1583 KASSERT(ip->pmp, ("ip->pmp is NULL %p %p", ip, ip->pmp)); 1584 hammer2_trans_init(ip->pmp, 0); 1585 1586 /* 1587 * Target should be an indexed inode or there's no way we will ever 1588 * be able to find it! 1589 */ 1590 KKASSERT((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0); 1591 1592 error = 0; 1593 1594 /* 1595 * Can return NULL and error == EXDEV if the common parent 1596 * crosses a directory with the xlink flag set. 1597 */ 1598 hammer2_inode_lock4(tdip, ip, NULL, NULL); 1599 1600 hammer2_update_time(&cmtime); 1601 1602 /* 1603 * Create the directory entry and bump nlinks. 1604 * Also update ip's ctime. 1605 */ 1606 if (error == 0) { 1607 error = hammer2_dirent_create(tdip, name, name_len, 1608 ip->meta.inum, ip->meta.type); 1609 hammer2_inode_modify(ip); 1610 ++ip->meta.nlinks; 1611 ip->meta.ctime = cmtime; 1612 } 1613 if (error == 0) { 1614 /* 1615 * Update dip's [cm]time 1616 */ 1617 hammer2_inode_modify(tdip); 1618 tdip->meta.mtime = cmtime; 1619 tdip->meta.ctime = cmtime; 1620 1621 cache_setunresolved(ap->a_nch); 1622 cache_setvp(ap->a_nch, ap->a_vp); 1623 } 1624 hammer2_inode_unlock(ip); 1625 hammer2_inode_unlock(tdip); 1626 1627 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_SIDEQ); 1628 hammer2_knote(ap->a_vp, NOTE_LINK); 1629 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1630 1631 return error; 1632 } 1633 1634 /* 1635 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1636 * 1637 * The operating system has already ensured that the directory entry 1638 * does not exist and done all appropriate namespace locking. 1639 */ 1640 static 1641 int 1642 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1643 { 1644 hammer2_inode_t *dip; 1645 hammer2_inode_t *nip; 1646 struct namecache *ncp; 1647 const uint8_t *name; 1648 size_t name_len; 1649 hammer2_tid_t inum; 1650 int error; 1651 1652 dip = VTOI(ap->a_dvp); 1653 if (dip->pmp->ronly || (dip->pmp->flags & HAMMER2_PMPF_EMERG)) 1654 return (EROFS); 1655 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1656 return (ENOSPC); 1657 1658 ncp = ap->a_nch->ncp; 1659 name = ncp->nc_name; 1660 name_len = ncp->nc_nlen; 1661 hammer2_trans_init(dip->pmp, 0); 1662 1663 inum = hammer2_trans_newinum(dip->pmp); 1664 1665 /* 1666 * Create the actual inode as a hidden file in the iroot, then 1667 * create the directory entry. The creation of the actual inode 1668 * sets its nlinks to 1 which is the value we desire. 1669 * 1670 * dip must be locked before nip to avoid deadlock. 1671 */ 1672 hammer2_inode_lock(dip, 0); 1673 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1674 inum, &error); 1675 1676 if (error) { 1677 error = hammer2_error_to_errno(error); 1678 } else { 1679 error = hammer2_dirent_create(dip, name, name_len, 1680 nip->meta.inum, nip->meta.type); 1681 } 1682 if (error) { 1683 if (nip) { 1684 hammer2_inode_unlink_finisher(nip, NULL); 1685 hammer2_inode_unlock(nip); 1686 nip = NULL; 1687 } 1688 *ap->a_vpp = NULL; 1689 } else { 1690 hammer2_inode_depend(dip, nip); /* before igetv */ 1691 *ap->a_vpp = hammer2_igetv(nip, &error); 1692 hammer2_inode_unlock(nip); 1693 } 1694 1695 /* 1696 * Update dip's mtime 1697 */ 1698 if (error == 0) { 1699 uint64_t mtime; 1700 1701 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1702 hammer2_update_time(&mtime); 1703 hammer2_inode_modify(dip); 1704 dip->meta.mtime = mtime; 1705 /*hammer2_inode_unlock(dip);*/ 1706 } 1707 hammer2_inode_unlock(dip); 1708 1709 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1710 1711 if (error == 0) { 1712 cache_setunresolved(ap->a_nch); 1713 cache_setvp(ap->a_nch, *ap->a_vpp); 1714 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1715 } 1716 return error; 1717 } 1718 1719 /* 1720 * Make a device node (typically a fifo) 1721 */ 1722 static 1723 int 1724 hammer2_vop_nmknod(struct vop_nmknod_args *ap) 1725 { 1726 hammer2_inode_t *dip; 1727 hammer2_inode_t *nip; 1728 struct namecache *ncp; 1729 const uint8_t *name; 1730 size_t name_len; 1731 hammer2_tid_t inum; 1732 int error; 1733 1734 dip = VTOI(ap->a_dvp); 1735 if (dip->pmp->ronly || (dip->pmp->flags & HAMMER2_PMPF_EMERG)) 1736 return (EROFS); 1737 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1738 return (ENOSPC); 1739 1740 ncp = ap->a_nch->ncp; 1741 name = ncp->nc_name; 1742 name_len = ncp->nc_nlen; 1743 hammer2_trans_init(dip->pmp, 0); 1744 1745 /* 1746 * Create the device inode and then create the directory entry. 1747 * 1748 * dip must be locked before nip to avoid deadlock. 1749 */ 1750 inum = hammer2_trans_newinum(dip->pmp); 1751 1752 hammer2_inode_lock(dip, 0); 1753 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1754 inum, &error); 1755 if (error == 0) { 1756 error = hammer2_dirent_create(dip, name, name_len, 1757 nip->meta.inum, nip->meta.type); 1758 } 1759 if (error) { 1760 if (nip) { 1761 hammer2_inode_unlink_finisher(nip, NULL); 1762 hammer2_inode_unlock(nip); 1763 nip = NULL; 1764 } 1765 *ap->a_vpp = NULL; 1766 } else { 1767 hammer2_inode_depend(dip, nip); /* before igetv */ 1768 *ap->a_vpp = hammer2_igetv(nip, &error); 1769 hammer2_inode_unlock(nip); 1770 } 1771 1772 /* 1773 * Update dip's mtime 1774 */ 1775 if (error == 0) { 1776 uint64_t mtime; 1777 1778 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1779 hammer2_update_time(&mtime); 1780 hammer2_inode_modify(dip); 1781 dip->meta.mtime = mtime; 1782 /*hammer2_inode_unlock(dip);*/ 1783 } 1784 hammer2_inode_unlock(dip); 1785 1786 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1787 1788 if (error == 0) { 1789 cache_setunresolved(ap->a_nch); 1790 cache_setvp(ap->a_nch, *ap->a_vpp); 1791 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1792 } 1793 return error; 1794 } 1795 1796 /* 1797 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1798 */ 1799 static 1800 int 1801 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1802 { 1803 hammer2_inode_t *dip; 1804 hammer2_inode_t *nip; 1805 struct namecache *ncp; 1806 const uint8_t *name; 1807 size_t name_len; 1808 hammer2_tid_t inum; 1809 int error; 1810 1811 dip = VTOI(ap->a_dvp); 1812 if (dip->pmp->ronly || (dip->pmp->flags & HAMMER2_PMPF_EMERG)) 1813 return (EROFS); 1814 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1815 return (ENOSPC); 1816 1817 ncp = ap->a_nch->ncp; 1818 name = ncp->nc_name; 1819 name_len = ncp->nc_nlen; 1820 hammer2_trans_init(dip->pmp, 0); 1821 1822 ap->a_vap->va_type = VLNK; /* enforce type */ 1823 1824 /* 1825 * Create the softlink as an inode and then create the directory 1826 * entry. 1827 * 1828 * dip must be locked before nip to avoid deadlock. 1829 */ 1830 inum = hammer2_trans_newinum(dip->pmp); 1831 1832 hammer2_inode_lock(dip, 0); 1833 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1834 inum, &error); 1835 if (error == 0) { 1836 error = hammer2_dirent_create(dip, name, name_len, 1837 nip->meta.inum, nip->meta.type); 1838 } 1839 if (error) { 1840 if (nip) { 1841 hammer2_inode_unlink_finisher(nip, NULL); 1842 hammer2_inode_unlock(nip); 1843 nip = NULL; 1844 } 1845 *ap->a_vpp = NULL; 1846 hammer2_inode_unlock(dip); 1847 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1848 return error; 1849 } 1850 hammer2_inode_depend(dip, nip); /* before igetv */ 1851 *ap->a_vpp = hammer2_igetv(nip, &error); 1852 1853 /* 1854 * Build the softlink (~like file data) and finalize the namecache. 1855 */ 1856 if (error == 0) { 1857 size_t bytes; 1858 struct uio auio; 1859 struct iovec aiov; 1860 1861 bytes = strlen(ap->a_target); 1862 1863 hammer2_inode_unlock(nip); 1864 bzero(&auio, sizeof(auio)); 1865 bzero(&aiov, sizeof(aiov)); 1866 auio.uio_iov = &aiov; 1867 auio.uio_segflg = UIO_SYSSPACE; 1868 auio.uio_rw = UIO_WRITE; 1869 auio.uio_resid = bytes; 1870 auio.uio_iovcnt = 1; 1871 auio.uio_td = curthread; 1872 aiov.iov_base = ap->a_target; 1873 aiov.iov_len = bytes; 1874 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1875 /* XXX handle error */ 1876 error = 0; 1877 } else { 1878 hammer2_inode_unlock(nip); 1879 } 1880 1881 /* 1882 * Update dip's mtime 1883 */ 1884 if (error == 0) { 1885 uint64_t mtime; 1886 1887 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1888 hammer2_update_time(&mtime); 1889 hammer2_inode_modify(dip); 1890 dip->meta.mtime = mtime; 1891 /*hammer2_inode_unlock(dip);*/ 1892 } 1893 hammer2_inode_unlock(dip); 1894 1895 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1896 1897 /* 1898 * Finalize namecache 1899 */ 1900 if (error == 0) { 1901 cache_setunresolved(ap->a_nch); 1902 cache_setvp(ap->a_nch, *ap->a_vpp); 1903 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1904 } 1905 return error; 1906 } 1907 1908 /* 1909 * hammer2_vop_nremove { nch, dvp, cred } 1910 */ 1911 static 1912 int 1913 hammer2_vop_nremove(struct vop_nremove_args *ap) 1914 { 1915 hammer2_xop_unlink_t *xop; 1916 hammer2_inode_t *dip; 1917 hammer2_inode_t *ip; 1918 struct vnode *vprecycle; 1919 struct namecache *ncp; 1920 int error; 1921 1922 dip = VTOI(ap->a_dvp); 1923 if (dip->pmp->ronly) 1924 return (EROFS); 1925 #if 0 1926 /* allow removals, except user to also bulkfree */ 1927 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1928 return (ENOSPC); 1929 #endif 1930 1931 ncp = ap->a_nch->ncp; 1932 1933 if (hammer2_debug_inode && dip->meta.inum == hammer2_debug_inode) { 1934 kprintf("hammer2: attempt to delete inside debug inode: %s\n", 1935 ncp->nc_name); 1936 while (hammer2_debug_inode && 1937 dip->meta.inum == hammer2_debug_inode) { 1938 tsleep(&hammer2_debug_inode, 0, "h2debug", hz*5); 1939 } 1940 } 1941 1942 hammer2_trans_init(dip->pmp, 0); 1943 hammer2_inode_lock(dip, 0); 1944 1945 /* 1946 * The unlink XOP unlinks the path from the directory and 1947 * locates and returns the cluster associated with the real inode. 1948 * We have to handle nlinks here on the frontend. 1949 */ 1950 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 1951 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen); 1952 1953 xop->isdir = 0; 1954 xop->dopermanent = 0; 1955 hammer2_xop_start(&xop->head, &hammer2_unlink_desc); 1956 1957 /* 1958 * Collect the real inode and adjust nlinks, destroy the real 1959 * inode if nlinks transitions to 0 and it was the real inode 1960 * (else it has already been removed). 1961 */ 1962 error = hammer2_xop_collect(&xop->head, 0); 1963 error = hammer2_error_to_errno(error); 1964 vprecycle = NULL; 1965 1966 if (error == 0) { 1967 ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1); 1968 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1969 if (ip) { 1970 if (hammer2_debug_inode && 1971 ip->meta.inum == hammer2_debug_inode) { 1972 kprintf("hammer2: attempt to delete debug " 1973 "inode!\n"); 1974 while (hammer2_debug_inode && 1975 ip->meta.inum == hammer2_debug_inode) { 1976 tsleep(&hammer2_debug_inode, 0, 1977 "h2debug", hz*5); 1978 } 1979 } 1980 hammer2_inode_unlink_finisher(ip, &vprecycle); 1981 hammer2_inode_depend(dip, ip); /* after modified */ 1982 hammer2_inode_unlock(ip); 1983 } 1984 } else { 1985 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1986 } 1987 1988 /* 1989 * Update dip's mtime 1990 */ 1991 if (error == 0) { 1992 uint64_t mtime; 1993 1994 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1995 hammer2_update_time(&mtime); 1996 hammer2_inode_modify(dip); 1997 dip->meta.mtime = mtime; 1998 /*hammer2_inode_unlock(dip);*/ 1999 } 2000 hammer2_inode_unlock(dip); 2001 2002 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 2003 if (error == 0) { 2004 cache_unlink(ap->a_nch); 2005 hammer2_knote(ap->a_dvp, NOTE_WRITE); 2006 } 2007 if (vprecycle) 2008 hammer2_inode_vprecycle(vprecycle); 2009 2010 return (error); 2011 } 2012 2013 /* 2014 * hammer2_vop_nrmdir { nch, dvp, cred } 2015 */ 2016 static 2017 int 2018 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 2019 { 2020 hammer2_xop_unlink_t *xop; 2021 hammer2_inode_t *dip; 2022 hammer2_inode_t *ip; 2023 struct namecache *ncp; 2024 struct vnode *vprecycle; 2025 int error; 2026 2027 dip = VTOI(ap->a_dvp); 2028 if (dip->pmp->ronly) 2029 return (EROFS); 2030 #if 0 2031 /* allow removals, except user to also bulkfree */ 2032 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 2033 return (ENOSPC); 2034 #endif 2035 2036 hammer2_trans_init(dip->pmp, 0); 2037 hammer2_inode_lock(dip, 0); 2038 2039 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 2040 2041 ncp = ap->a_nch->ncp; 2042 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen); 2043 xop->isdir = 1; 2044 xop->dopermanent = 0; 2045 hammer2_xop_start(&xop->head, &hammer2_unlink_desc); 2046 2047 /* 2048 * Collect the real inode and adjust nlinks, destroy the real 2049 * inode if nlinks transitions to 0 and it was the real inode 2050 * (else it has already been removed). 2051 */ 2052 error = hammer2_xop_collect(&xop->head, 0); 2053 error = hammer2_error_to_errno(error); 2054 vprecycle = NULL; 2055 2056 if (error == 0) { 2057 ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1); 2058 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 2059 if (ip) { 2060 hammer2_inode_unlink_finisher(ip, &vprecycle); 2061 hammer2_inode_depend(dip, ip); /* after modified */ 2062 hammer2_inode_unlock(ip); 2063 } 2064 } else { 2065 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 2066 } 2067 2068 /* 2069 * Update dip's mtime 2070 */ 2071 if (error == 0) { 2072 uint64_t mtime; 2073 2074 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 2075 hammer2_update_time(&mtime); 2076 hammer2_inode_modify(dip); 2077 dip->meta.mtime = mtime; 2078 /*hammer2_inode_unlock(dip);*/ 2079 } 2080 hammer2_inode_unlock(dip); 2081 2082 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 2083 if (error == 0) { 2084 cache_unlink(ap->a_nch); 2085 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); 2086 } 2087 if (vprecycle) 2088 hammer2_inode_vprecycle(vprecycle); 2089 return (error); 2090 } 2091 2092 /* 2093 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 2094 */ 2095 static 2096 int 2097 hammer2_vop_nrename(struct vop_nrename_args *ap) 2098 { 2099 struct namecache *fncp; 2100 struct namecache *tncp; 2101 hammer2_inode_t *fdip; /* source directory */ 2102 hammer2_inode_t *tdip; /* target directory */ 2103 hammer2_inode_t *ip; /* file being renamed */ 2104 hammer2_inode_t *tip; /* replaced target during rename or NULL */ 2105 struct vnode *vprecycle; 2106 const uint8_t *fname; 2107 size_t fname_len; 2108 const uint8_t *tname; 2109 size_t tname_len; 2110 int error; 2111 int update_tdip; 2112 int update_fdip; 2113 hammer2_key_t tlhc; 2114 2115 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 2116 return(EXDEV); 2117 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 2118 return(EXDEV); 2119 2120 fdip = VTOI(ap->a_fdvp); /* source directory */ 2121 tdip = VTOI(ap->a_tdvp); /* target directory */ 2122 2123 if (fdip->pmp->ronly || (fdip->pmp->flags & HAMMER2_PMPF_EMERG)) 2124 return (EROFS); 2125 if (hammer2_vfs_enospace(fdip, 0, ap->a_cred) > 1) 2126 return (ENOSPC); 2127 2128 fncp = ap->a_fnch->ncp; /* entry name in source */ 2129 fname = fncp->nc_name; 2130 fname_len = fncp->nc_nlen; 2131 2132 tncp = ap->a_tnch->ncp; /* entry name in target */ 2133 tname = tncp->nc_name; 2134 tname_len = tncp->nc_nlen; 2135 2136 hammer2_trans_init(tdip->pmp, 0); 2137 2138 update_tdip = 0; 2139 update_fdip = 0; 2140 2141 ip = VTOI(fncp->nc_vp); 2142 hammer2_inode_ref(ip); /* extra ref */ 2143 2144 /* 2145 * Lookup the target name to determine if a directory entry 2146 * is being overwritten. We only hold related inode locks 2147 * temporarily, the operating system is expected to protect 2148 * against rename races. 2149 */ 2150 tip = tncp->nc_vp ? VTOI(tncp->nc_vp) : NULL; 2151 if (tip) 2152 hammer2_inode_ref(tip); /* extra ref */ 2153 2154 /* 2155 * Can return NULL and error == EXDEV if the common parent 2156 * crosses a directory with the xlink flag set. 2157 * 2158 * For now try to avoid deadlocks with a simple pointer address 2159 * test. (tip) can be NULL. 2160 */ 2161 error = 0; 2162 { 2163 hammer2_inode_t *ip1 = fdip; 2164 hammer2_inode_t *ip2 = tdip; 2165 hammer2_inode_t *ip3 = ip; 2166 hammer2_inode_t *ip4 = tip; /* may be NULL */ 2167 2168 if (fdip > tdip) { 2169 ip1 = tdip; 2170 ip2 = fdip; 2171 } 2172 if (tip && ip > tip) { 2173 ip3 = tip; 2174 ip4 = ip; 2175 } 2176 hammer2_inode_lock4(ip1, ip2, ip3, ip4); 2177 } 2178 2179 /* 2180 * Resolve the collision space for (tdip, tname, tname_len) 2181 * 2182 * tdip must be held exclusively locked to prevent races since 2183 * multiple filenames can end up in the same collision space. 2184 */ 2185 { 2186 hammer2_xop_scanlhc_t *sxop; 2187 hammer2_tid_t lhcbase; 2188 2189 tlhc = hammer2_dirhash(tname, tname_len); 2190 lhcbase = tlhc; 2191 sxop = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING); 2192 sxop->lhc = tlhc; 2193 hammer2_xop_start(&sxop->head, &hammer2_scanlhc_desc); 2194 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) { 2195 if (tlhc != sxop->head.cluster.focus->bref.key) 2196 break; 2197 ++tlhc; 2198 } 2199 error = hammer2_error_to_errno(error); 2200 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP); 2201 2202 if (error) { 2203 if (error != ENOENT) 2204 goto done2; 2205 ++tlhc; 2206 error = 0; 2207 } 2208 if ((lhcbase ^ tlhc) & ~HAMMER2_DIRHASH_LOMASK) { 2209 error = ENOSPC; 2210 goto done2; 2211 } 2212 } 2213 2214 /* 2215 * Ready to go, issue the rename to the backend. Note that meta-data 2216 * updates to the related inodes occur separately from the rename 2217 * operation. 2218 * 2219 * NOTE: While it is not necessary to update ip->meta.name*, doing 2220 * so aids catastrophic recovery and debugging. 2221 */ 2222 if (error == 0) { 2223 hammer2_xop_nrename_t *xop4; 2224 2225 xop4 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING); 2226 xop4->lhc = tlhc; 2227 xop4->ip_key = ip->meta.name_key; 2228 hammer2_xop_setip2(&xop4->head, ip); 2229 hammer2_xop_setip3(&xop4->head, tdip); 2230 if (tip && tip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) 2231 hammer2_xop_setip4(&xop4->head, tip); 2232 hammer2_xop_setname(&xop4->head, fname, fname_len); 2233 hammer2_xop_setname2(&xop4->head, tname, tname_len); 2234 hammer2_xop_start(&xop4->head, &hammer2_nrename_desc); 2235 2236 error = hammer2_xop_collect(&xop4->head, 0); 2237 error = hammer2_error_to_errno(error); 2238 hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP); 2239 2240 if (error == ENOENT) 2241 error = 0; 2242 2243 /* 2244 * Update inode meta-data. 2245 * 2246 * WARNING! The in-memory inode (ip) structure does not 2247 * maintain a copy of the inode's filename buffer. 2248 */ 2249 if (error == 0 && 2250 (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) { 2251 hammer2_inode_modify(ip); 2252 ip->meta.name_len = tname_len; 2253 ip->meta.name_key = tlhc; 2254 } 2255 if (error == 0) { 2256 hammer2_inode_modify(ip); 2257 ip->meta.iparent = tdip->meta.inum; 2258 } 2259 update_fdip = 1; 2260 update_tdip = 1; 2261 } 2262 2263 done2: 2264 /* 2265 * If no error, the backend has replaced the target directory entry. 2266 * We must adjust nlinks on the original replace target if it exists. 2267 */ 2268 vprecycle = NULL; 2269 if (error == 0 && tip) { 2270 hammer2_inode_unlink_finisher(tip, &vprecycle); 2271 } 2272 2273 /* 2274 * Update directory mtimes to represent the something changed. 2275 */ 2276 if (update_fdip || update_tdip) { 2277 uint64_t mtime; 2278 2279 hammer2_update_time(&mtime); 2280 if (update_fdip) { 2281 hammer2_inode_modify(fdip); 2282 fdip->meta.mtime = mtime; 2283 } 2284 if (update_tdip) { 2285 hammer2_inode_modify(tdip); 2286 tdip->meta.mtime = mtime; 2287 } 2288 } 2289 if (tip) { 2290 hammer2_inode_unlock(tip); 2291 hammer2_inode_drop(tip); 2292 } 2293 hammer2_inode_unlock(ip); 2294 hammer2_inode_unlock(tdip); 2295 hammer2_inode_unlock(fdip); 2296 hammer2_inode_drop(ip); 2297 hammer2_trans_done(tdip->pmp, HAMMER2_TRANS_SIDEQ); 2298 2299 /* 2300 * Issue the namecache update after unlocking all the internal 2301 * hammer2 structures, otherwise we might deadlock. 2302 * 2303 * WARNING! The target namespace must be updated atomically, 2304 * and we depend on cache_rename() to handle that for 2305 * us. Do not do a separate cache_unlink() because 2306 * that leaves a small window of opportunity for other 2307 * threads to allocate the target namespace before we 2308 * manage to complete our rename. 2309 * 2310 * WARNING! cache_rename() (and cache_unlink()) will properly 2311 * set VREF_FINALIZE on any attached vnode. Do not 2312 * call cache_setunresolved() manually before-hand as 2313 * this will prevent the flag from being set later via 2314 * cache_rename(). If VREF_FINALIZE is not properly set 2315 * and the inode is no longer in the topology, related 2316 * chains can remain dirty indefinitely. 2317 */ 2318 if (error == 0 && tip) { 2319 /*cache_unlink(ap->a_tnch); see above */ 2320 /*cache_setunresolved(ap->a_tnch); see above */ 2321 } 2322 if (error == 0) { 2323 cache_rename(ap->a_fnch, ap->a_tnch); 2324 hammer2_knote(ap->a_fdvp, NOTE_WRITE); 2325 hammer2_knote(ap->a_tdvp, NOTE_WRITE); 2326 hammer2_knote(fncp->nc_vp, NOTE_RENAME); 2327 } 2328 if (vprecycle) 2329 hammer2_inode_vprecycle(vprecycle); 2330 2331 return (error); 2332 } 2333 2334 /* 2335 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2336 */ 2337 static 2338 int 2339 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2340 { 2341 hammer2_inode_t *ip; 2342 int error; 2343 2344 ip = VTOI(ap->a_vp); 2345 2346 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2347 ap->a_fflag, ap->a_cred); 2348 return (error); 2349 } 2350 2351 static 2352 int 2353 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2354 { 2355 struct mount *mp; 2356 hammer2_pfs_t *pmp; 2357 int rc; 2358 2359 switch (ap->a_op) { 2360 case (MOUNTCTL_SET_EXPORT): 2361 mp = ap->a_head.a_ops->head.vv_mount; 2362 pmp = MPTOPMP(mp); 2363 2364 if (ap->a_ctllen != sizeof(struct export_args)) 2365 rc = (EINVAL); 2366 else 2367 rc = vfs_export(mp, &pmp->export, 2368 (const struct export_args *)ap->a_ctl); 2369 break; 2370 default: 2371 rc = vop_stdmountctl(ap); 2372 break; 2373 } 2374 return (rc); 2375 } 2376 2377 /* 2378 * KQFILTER 2379 */ 2380 static void filt_hammer2detach(struct knote *kn); 2381 static int filt_hammer2read(struct knote *kn, long hint); 2382 static int filt_hammer2write(struct knote *kn, long hint); 2383 static int filt_hammer2vnode(struct knote *kn, long hint); 2384 2385 static struct filterops hammer2read_filtops = 2386 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2387 NULL, filt_hammer2detach, filt_hammer2read }; 2388 static struct filterops hammer2write_filtops = 2389 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2390 NULL, filt_hammer2detach, filt_hammer2write }; 2391 static struct filterops hammer2vnode_filtops = 2392 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2393 NULL, filt_hammer2detach, filt_hammer2vnode }; 2394 2395 static 2396 int 2397 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap) 2398 { 2399 struct vnode *vp = ap->a_vp; 2400 struct knote *kn = ap->a_kn; 2401 2402 switch (kn->kn_filter) { 2403 case EVFILT_READ: 2404 kn->kn_fop = &hammer2read_filtops; 2405 break; 2406 case EVFILT_WRITE: 2407 kn->kn_fop = &hammer2write_filtops; 2408 break; 2409 case EVFILT_VNODE: 2410 kn->kn_fop = &hammer2vnode_filtops; 2411 break; 2412 default: 2413 return (EOPNOTSUPP); 2414 } 2415 2416 kn->kn_hook = (caddr_t)vp; 2417 2418 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2419 2420 return(0); 2421 } 2422 2423 static void 2424 filt_hammer2detach(struct knote *kn) 2425 { 2426 struct vnode *vp = (void *)kn->kn_hook; 2427 2428 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2429 } 2430 2431 static int 2432 filt_hammer2read(struct knote *kn, long hint) 2433 { 2434 struct vnode *vp = (void *)kn->kn_hook; 2435 hammer2_inode_t *ip = VTOI(vp); 2436 off_t off; 2437 2438 if (hint == NOTE_REVOKE) { 2439 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2440 return(1); 2441 } 2442 off = ip->meta.size - kn->kn_fp->f_offset; 2443 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2444 if (kn->kn_sfflags & NOTE_OLDAPI) 2445 return(1); 2446 return (kn->kn_data != 0); 2447 } 2448 2449 2450 static int 2451 filt_hammer2write(struct knote *kn, long hint) 2452 { 2453 if (hint == NOTE_REVOKE) 2454 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2455 kn->kn_data = 0; 2456 return (1); 2457 } 2458 2459 static int 2460 filt_hammer2vnode(struct knote *kn, long hint) 2461 { 2462 if (kn->kn_sfflags & hint) 2463 kn->kn_fflags |= hint; 2464 if (hint == NOTE_REVOKE) { 2465 kn->kn_flags |= (EV_EOF | EV_NODATA); 2466 return (1); 2467 } 2468 return (kn->kn_fflags != 0); 2469 } 2470 2471 /* 2472 * FIFO VOPS 2473 */ 2474 static 2475 int 2476 hammer2_vop_markatime(struct vop_markatime_args *ap) 2477 { 2478 hammer2_inode_t *ip; 2479 struct vnode *vp; 2480 2481 vp = ap->a_vp; 2482 ip = VTOI(vp); 2483 2484 if (ip->pmp->ronly || (ip->pmp->flags & HAMMER2_PMPF_EMERG)) 2485 return (EROFS); 2486 return(0); 2487 } 2488 2489 static 2490 int 2491 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap) 2492 { 2493 int error; 2494 2495 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2496 if (error) 2497 error = hammer2_vop_kqfilter(ap); 2498 return(error); 2499 } 2500 2501 /* 2502 * VOPS vector 2503 */ 2504 struct vop_ops hammer2_vnode_vops = { 2505 .vop_default = vop_defaultop, 2506 .vop_fsync = hammer2_vop_fsync, 2507 .vop_getpages = vop_stdgetpages, 2508 .vop_putpages = vop_stdputpages, 2509 .vop_access = hammer2_vop_access, 2510 .vop_advlock = hammer2_vop_advlock, 2511 .vop_close = hammer2_vop_close, 2512 .vop_nlink = hammer2_vop_nlink, 2513 .vop_ncreate = hammer2_vop_ncreate, 2514 .vop_nsymlink = hammer2_vop_nsymlink, 2515 .vop_nremove = hammer2_vop_nremove, 2516 .vop_nrmdir = hammer2_vop_nrmdir, 2517 .vop_nrename = hammer2_vop_nrename, 2518 .vop_getattr = hammer2_vop_getattr, 2519 .vop_getattr_lite = hammer2_vop_getattr_lite, 2520 .vop_setattr = hammer2_vop_setattr, 2521 .vop_readdir = hammer2_vop_readdir, 2522 .vop_readlink = hammer2_vop_readlink, 2523 .vop_read = hammer2_vop_read, 2524 .vop_write = hammer2_vop_write, 2525 .vop_open = hammer2_vop_open, 2526 .vop_inactive = hammer2_vop_inactive, 2527 .vop_reclaim = hammer2_vop_reclaim, 2528 .vop_nresolve = hammer2_vop_nresolve, 2529 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2530 .vop_nmkdir = hammer2_vop_nmkdir, 2531 .vop_nmknod = hammer2_vop_nmknod, 2532 .vop_ioctl = hammer2_vop_ioctl, 2533 .vop_mountctl = hammer2_vop_mountctl, 2534 .vop_bmap = hammer2_vop_bmap, 2535 .vop_strategy = hammer2_vop_strategy, 2536 .vop_kqfilter = hammer2_vop_kqfilter 2537 }; 2538 2539 struct vop_ops hammer2_spec_vops = { 2540 .vop_default = vop_defaultop, 2541 .vop_fsync = hammer2_vop_fsync, 2542 .vop_read = vop_stdnoread, 2543 .vop_write = vop_stdnowrite, 2544 .vop_access = hammer2_vop_access, 2545 .vop_close = hammer2_vop_close, 2546 .vop_markatime = hammer2_vop_markatime, 2547 .vop_getattr = hammer2_vop_getattr, 2548 .vop_inactive = hammer2_vop_inactive, 2549 .vop_reclaim = hammer2_vop_reclaim, 2550 .vop_setattr = hammer2_vop_setattr 2551 }; 2552 2553 struct vop_ops hammer2_fifo_vops = { 2554 .vop_default = fifo_vnoperate, 2555 .vop_fsync = hammer2_vop_fsync, 2556 #if 0 2557 .vop_read = hammer2_vop_fiforead, 2558 .vop_write = hammer2_vop_fifowrite, 2559 #endif 2560 .vop_access = hammer2_vop_access, 2561 #if 0 2562 .vop_close = hammer2_vop_fifoclose, 2563 #endif 2564 .vop_markatime = hammer2_vop_markatime, 2565 .vop_getattr = hammer2_vop_getattr, 2566 .vop_inactive = hammer2_vop_inactive, 2567 .vop_reclaim = hammer2_vop_reclaim, 2568 .vop_setattr = hammer2_vop_setattr, 2569 .vop_kqfilter = hammer2_vop_fifokqfilter 2570 }; 2571 2572