1 /* 2 * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/mountctl.h> 53 #include <sys/dirent.h> 54 #include <sys/uio.h> 55 #include <sys/objcache.h> 56 #include <sys/event.h> 57 #include <sys/file.h> 58 #include <vfs/fifofs/fifo.h> 59 60 #include "hammer2.h" 61 62 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 63 int seqcount); 64 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 65 int ioflag, int seqcount); 66 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 67 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 68 69 struct objcache *cache_xops; 70 71 static __inline 72 void 73 hammer2_knote(struct vnode *vp, int flags) 74 { 75 if (flags) 76 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 77 } 78 79 /* 80 * Last reference to a vnode is going away but it is still cached. 81 */ 82 static 83 int 84 hammer2_vop_inactive(struct vop_inactive_args *ap) 85 { 86 hammer2_inode_t *ip; 87 struct vnode *vp; 88 89 vp = ap->a_vp; 90 ip = VTOI(vp); 91 92 /* 93 * Degenerate case 94 */ 95 if (ip == NULL) { 96 vrecycle(vp); 97 return (0); 98 } 99 100 /* 101 * Check for deleted inodes and recycle immediately on the last 102 * release. Be sure to destroy any left-over buffer cache buffers 103 * so we do not waste time trying to flush them. 104 * 105 * Note that deleting the file block chains under the inode chain 106 * would just be a waste of energy, so don't do it. 107 * 108 * WARNING: nvtruncbuf() can only be safely called without the inode 109 * lock held due to the way our write thread works. 110 */ 111 if (ip->flags & HAMMER2_INODE_ISUNLINKED) { 112 hammer2_key_t lbase; 113 int nblksize; 114 115 /* 116 * Detect updates to the embedded data which may be 117 * synchronized by the strategy code. Simply mark the 118 * inode modified so it gets picked up by our normal flush. 119 */ 120 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL); 121 nvtruncbuf(vp, 0, nblksize, 0, 0); 122 vrecycle(vp); 123 } 124 return (0); 125 } 126 127 /* 128 * Reclaim a vnode so that it can be reused; after the inode is 129 * disassociated, the filesystem must manage it alone. 130 */ 131 static 132 int 133 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 134 { 135 hammer2_inode_t *ip; 136 hammer2_pfs_t *pmp; 137 struct vnode *vp; 138 139 vp = ap->a_vp; 140 ip = VTOI(vp); 141 if (ip == NULL) { 142 return(0); 143 } 144 pmp = ip->pmp; 145 146 /* 147 * The final close of a deleted file or directory marks it for 148 * destruction. The DELETED flag allows the flusher to shortcut 149 * any modified blocks still unflushed (that is, just ignore them). 150 * 151 * HAMMER2 usually does not try to optimize the freemap by returning 152 * deleted blocks to it as it does not usually know how many snapshots 153 * might be referencing portions of the file/dir. 154 */ 155 vp->v_data = NULL; 156 ip->vp = NULL; 157 158 /* 159 * NOTE! We do not attempt to flush chains here, flushing is 160 * really fragile and could also deadlock. 161 */ 162 vclrisdirty(vp); 163 164 /* 165 * Modified inodes will already be on SIDEQ or SYNCQ. However, 166 * unlinked-but-open inodes may already have been synced and might 167 * still require deletion-on-reclaim. 168 */ 169 if ((ip->flags & (HAMMER2_INODE_ISUNLINKED | 170 HAMMER2_INODE_DELETING)) == 171 HAMMER2_INODE_ISUNLINKED) { 172 hammer2_inode_lock(ip, 0); 173 if ((ip->flags & (HAMMER2_INODE_ISUNLINKED | 174 HAMMER2_INODE_DELETING)) == 175 HAMMER2_INODE_ISUNLINKED) { 176 atomic_set_int(&ip->flags, HAMMER2_INODE_DELETING); 177 hammer2_inode_delayed_sideq(ip); 178 } 179 hammer2_inode_unlock(ip); 180 } 181 182 /* 183 * Modified inodes will already be on SIDEQ or SYNCQ, no further 184 * action is needed. 185 * 186 * We cannot safely synchronize the inode from inside the reclaim 187 * due to potentially deep locks held as-of when the reclaim occurs. 188 * Interactions and potential deadlocks abound. We also can't do it 189 * here without desynchronizing from the related directory entrie(s). 190 */ 191 hammer2_inode_drop(ip); /* vp ref */ 192 193 /* 194 * XXX handle background sync when ip dirty, kernel will no longer 195 * notify us regarding this inode because there is no longer a 196 * vnode attached to it. 197 */ 198 199 return (0); 200 } 201 202 /* 203 * Currently this function synchronizes the front-end inode state to the 204 * backend chain topology, then flushes the inode's chain and sub-topology 205 * to backend media. This function does not flush the root topology down to 206 * the inode. 207 */ 208 static 209 int 210 hammer2_vop_fsync(struct vop_fsync_args *ap) 211 { 212 hammer2_inode_t *ip; 213 struct vnode *vp; 214 int error1; 215 int error2; 216 217 vp = ap->a_vp; 218 ip = VTOI(vp); 219 error1 = 0; 220 221 hammer2_trans_init(ip->pmp, 0); 222 223 /* 224 * Flush dirty buffers in the file's logical buffer cache. 225 * It is best to wait for the strategy code to commit the 226 * buffers to the device's backing buffer cache before 227 * then trying to flush the inode. 228 * 229 * This should be quick, but certain inode modifications cached 230 * entirely in the hammer2_inode structure may not trigger a 231 * buffer read until the flush so the fsync can wind up also 232 * doing scattered reads. 233 */ 234 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 235 bio_track_wait(&vp->v_track_write, 0, 0); 236 237 /* 238 * Flush any inode changes 239 */ 240 hammer2_inode_lock(ip, 0); 241 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MODIFIED)) 242 error1 = hammer2_inode_chain_sync(ip); 243 244 /* 245 * Flush dirty chains related to the inode. 246 * 247 * NOTE! We are not in a flush transaction. The inode remains on 248 * the sideq so the filesystem syncer can synchronize it to 249 * the volume root. 250 */ 251 error2 = hammer2_inode_chain_flush(ip, HAMMER2_XOP_INODE_STOP); 252 if (error2) 253 error1 = error2; 254 255 /* 256 * We may be able to clear the vnode dirty flag. The 257 * hammer2_pfs_moderate() code depends on this usually working. 258 */ 259 if ((ip->flags & (HAMMER2_INODE_MODIFIED | 260 HAMMER2_INODE_RESIZED | 261 HAMMER2_INODE_DIRTYDATA)) == 0 && 262 RB_EMPTY(&vp->v_rbdirty_tree) && 263 !bio_track_active(&vp->v_track_write)) { 264 vclrisdirty(vp); 265 } 266 hammer2_inode_unlock(ip); 267 hammer2_trans_done(ip->pmp, 0); 268 269 return (error1); 270 } 271 272 static 273 int 274 hammer2_vop_access(struct vop_access_args *ap) 275 { 276 hammer2_inode_t *ip = VTOI(ap->a_vp); 277 uid_t uid; 278 gid_t gid; 279 int error; 280 281 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED); 282 uid = hammer2_to_unix_xid(&ip->meta.uid); 283 gid = hammer2_to_unix_xid(&ip->meta.gid); 284 error = vop_helper_access(ap, uid, gid, ip->meta.mode, ip->meta.uflags); 285 hammer2_inode_unlock(ip); 286 287 return (error); 288 } 289 290 static 291 int 292 hammer2_vop_getattr(struct vop_getattr_args *ap) 293 { 294 hammer2_pfs_t *pmp; 295 hammer2_inode_t *ip; 296 struct vnode *vp; 297 struct vattr *vap; 298 hammer2_chain_t *chain; 299 int i; 300 301 vp = ap->a_vp; 302 vap = ap->a_vap; 303 304 ip = VTOI(vp); 305 pmp = ip->pmp; 306 307 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED); 308 309 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 310 vap->va_fileid = ip->meta.inum; 311 vap->va_mode = ip->meta.mode; 312 vap->va_nlink = ip->meta.nlinks; 313 vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid); 314 vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid); 315 vap->va_rmajor = 0; 316 vap->va_rminor = 0; 317 vap->va_size = ip->meta.size; /* protected by shared lock */ 318 vap->va_blocksize = HAMMER2_PBUFSIZE; 319 vap->va_flags = ip->meta.uflags; 320 hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime); 321 hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime); 322 hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime); 323 vap->va_gen = 1; 324 vap->va_bytes = 0; 325 if (ip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) { 326 /* 327 * Can't really calculate directory use sans the files under 328 * it, just assume one block for now. 329 */ 330 vap->va_bytes += HAMMER2_INODE_BYTES; 331 } else { 332 for (i = 0; i < ip->cluster.nchains; ++i) { 333 if ((chain = ip->cluster.array[i].chain) != NULL) { 334 if (vap->va_bytes < 335 chain->bref.embed.stats.data_count) { 336 vap->va_bytes = 337 chain->bref.embed.stats.data_count; 338 } 339 } 340 } 341 } 342 vap->va_type = hammer2_get_vtype(ip->meta.type); 343 vap->va_filerev = 0; 344 vap->va_uid_uuid = ip->meta.uid; 345 vap->va_gid_uuid = ip->meta.gid; 346 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 347 VA_FSID_UUID_VALID; 348 349 hammer2_inode_unlock(ip); 350 351 return (0); 352 } 353 354 static 355 int 356 hammer2_vop_setattr(struct vop_setattr_args *ap) 357 { 358 hammer2_inode_t *ip; 359 struct vnode *vp; 360 struct vattr *vap; 361 int error; 362 int kflags = 0; 363 uint64_t ctime; 364 365 vp = ap->a_vp; 366 vap = ap->a_vap; 367 hammer2_update_time(&ctime); 368 369 ip = VTOI(vp); 370 371 if (ip->pmp->ronly) 372 return (EROFS); 373 if (hammer2_vfs_enospace(ip, 0, ap->a_cred) > 1) 374 return (ENOSPC); 375 376 /*hammer2_pfs_memory_wait(ip->pmp);*/ 377 hammer2_trans_init(ip->pmp, 0); 378 hammer2_inode_lock(ip, 0); 379 error = 0; 380 381 if (vap->va_flags != VNOVAL) { 382 uint32_t flags; 383 384 flags = ip->meta.uflags; 385 error = vop_helper_setattr_flags(&flags, vap->va_flags, 386 hammer2_to_unix_xid(&ip->meta.uid), 387 ap->a_cred); 388 if (error == 0) { 389 if (ip->meta.uflags != flags) { 390 hammer2_inode_modify(ip); 391 ip->meta.uflags = flags; 392 ip->meta.ctime = ctime; 393 kflags |= NOTE_ATTRIB; 394 } 395 if (ip->meta.uflags & (IMMUTABLE | APPEND)) { 396 error = 0; 397 goto done; 398 } 399 } 400 goto done; 401 } 402 if (ip->meta.uflags & (IMMUTABLE | APPEND)) { 403 error = EPERM; 404 goto done; 405 } 406 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 407 mode_t cur_mode = ip->meta.mode; 408 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid); 409 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid); 410 uuid_t uuid_uid; 411 uuid_t uuid_gid; 412 413 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 414 ap->a_cred, 415 &cur_uid, &cur_gid, &cur_mode); 416 if (error == 0) { 417 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 418 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 419 if (bcmp(&uuid_uid, &ip->meta.uid, sizeof(uuid_uid)) || 420 bcmp(&uuid_gid, &ip->meta.gid, sizeof(uuid_gid)) || 421 ip->meta.mode != cur_mode 422 ) { 423 hammer2_inode_modify(ip); 424 ip->meta.uid = uuid_uid; 425 ip->meta.gid = uuid_gid; 426 ip->meta.mode = cur_mode; 427 ip->meta.ctime = ctime; 428 } 429 kflags |= NOTE_ATTRIB; 430 } 431 } 432 433 /* 434 * Resize the file 435 */ 436 if (vap->va_size != VNOVAL && ip->meta.size != vap->va_size) { 437 switch(vp->v_type) { 438 case VREG: 439 if (vap->va_size == ip->meta.size) 440 break; 441 if (vap->va_size < ip->meta.size) { 442 hammer2_mtx_ex(&ip->truncate_lock); 443 hammer2_truncate_file(ip, vap->va_size); 444 hammer2_mtx_unlock(&ip->truncate_lock); 445 kflags |= NOTE_WRITE; 446 } else { 447 hammer2_extend_file(ip, vap->va_size); 448 kflags |= NOTE_WRITE | NOTE_EXTEND; 449 } 450 hammer2_inode_modify(ip); 451 ip->meta.mtime = ctime; 452 vclrflags(vp, VLASTWRITETS); 453 break; 454 default: 455 error = EINVAL; 456 goto done; 457 } 458 } 459 #if 0 460 /* atime not supported */ 461 if (vap->va_atime.tv_sec != VNOVAL) { 462 hammer2_inode_modify(ip); 463 ip->meta.atime = hammer2_timespec_to_time(&vap->va_atime); 464 kflags |= NOTE_ATTRIB; 465 } 466 #endif 467 if (vap->va_mode != (mode_t)VNOVAL) { 468 mode_t cur_mode = ip->meta.mode; 469 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid); 470 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid); 471 472 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 473 cur_uid, cur_gid, &cur_mode); 474 if (error == 0 && ip->meta.mode != cur_mode) { 475 hammer2_inode_modify(ip); 476 ip->meta.mode = cur_mode; 477 ip->meta.ctime = ctime; 478 kflags |= NOTE_ATTRIB; 479 } 480 } 481 482 if (vap->va_mtime.tv_sec != VNOVAL) { 483 hammer2_inode_modify(ip); 484 ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime); 485 kflags |= NOTE_ATTRIB; 486 vclrflags(vp, VLASTWRITETS); 487 } 488 489 done: 490 /* 491 * If a truncation occurred we must call chain_sync() now in order 492 * to trim the related data chains, otherwise a later expansion can 493 * cause havoc. 494 * 495 * If an extend occured that changed the DIRECTDATA state, we must 496 * call inode_fsync now in order to prepare the inode's indirect 497 * block table. 498 * 499 * WARNING! This means we are making an adjustment to the inode's 500 * chain outside of sync/fsync, and not just to inode->meta, which 501 * may result in some consistency issues if a crash were to occur 502 * at just the wrong time. 503 */ 504 if (ip->flags & HAMMER2_INODE_RESIZED) 505 hammer2_inode_chain_sync(ip); 506 507 /* 508 * Cleanup. 509 */ 510 hammer2_inode_unlock(ip); 511 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_SIDEQ); 512 hammer2_knote(ip->vp, kflags); 513 514 return (error); 515 } 516 517 static 518 int 519 hammer2_vop_readdir(struct vop_readdir_args *ap) 520 { 521 hammer2_xop_readdir_t *xop; 522 hammer2_blockref_t bref; 523 hammer2_inode_t *ip; 524 hammer2_tid_t inum; 525 hammer2_key_t lkey; 526 struct uio *uio; 527 off_t *cookies; 528 off_t saveoff; 529 int cookie_index; 530 int ncookies; 531 int error; 532 int eofflag; 533 int r; 534 535 ip = VTOI(ap->a_vp); 536 uio = ap->a_uio; 537 saveoff = uio->uio_offset; 538 eofflag = 0; 539 error = 0; 540 541 /* 542 * Setup cookies directory entry cookies if requested 543 */ 544 if (ap->a_ncookies) { 545 ncookies = uio->uio_resid / 16 + 1; 546 if (ncookies > 1024) 547 ncookies = 1024; 548 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 549 } else { 550 ncookies = -1; 551 cookies = NULL; 552 } 553 cookie_index = 0; 554 555 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED); 556 557 /* 558 * Handle artificial entries. To ensure that only positive 64 bit 559 * quantities are returned to userland we always strip off bit 63. 560 * The hash code is designed such that codes 0x0000-0x7FFF are not 561 * used, allowing us to use these codes for articial entries. 562 * 563 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 564 * allow '..' to cross the mount point into (e.g.) the super-root. 565 */ 566 if (saveoff == 0) { 567 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK; 568 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 569 if (r) 570 goto done; 571 if (cookies) 572 cookies[cookie_index] = saveoff; 573 ++saveoff; 574 ++cookie_index; 575 if (cookie_index == ncookies) 576 goto done; 577 } 578 579 if (saveoff == 1) { 580 /* 581 * Be careful with lockorder when accessing ".." 582 * 583 * (ip is the current dir. xip is the parent dir). 584 */ 585 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK; 586 if (ip != ip->pmp->iroot) 587 inum = ip->meta.iparent & HAMMER2_DIRHASH_USERMSK; 588 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 589 if (r) 590 goto done; 591 if (cookies) 592 cookies[cookie_index] = saveoff; 593 ++saveoff; 594 ++cookie_index; 595 if (cookie_index == ncookies) 596 goto done; 597 } 598 599 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 600 if (hammer2_debug & 0x0020) 601 kprintf("readdir: lkey %016jx\n", lkey); 602 if (error) 603 goto done; 604 605 /* 606 * Use XOP for cluster scan. 607 * 608 * parent is the inode cluster, already locked for us. Don't 609 * double lock shared locks as this will screw up upgrades. 610 */ 611 xop = hammer2_xop_alloc(ip, 0); 612 xop->lkey = lkey; 613 hammer2_xop_start(&xop->head, &hammer2_readdir_desc); 614 615 for (;;) { 616 const hammer2_inode_data_t *ripdata; 617 const char *dname; 618 int dtype; 619 620 error = hammer2_xop_collect(&xop->head, 0); 621 error = hammer2_error_to_errno(error); 622 if (error) { 623 break; 624 } 625 if (cookie_index == ncookies) 626 break; 627 if (hammer2_debug & 0x0020) 628 kprintf("cluster chain %p %p\n", 629 xop->head.cluster.focus, 630 (xop->head.cluster.focus ? 631 xop->head.cluster.focus->data : (void *)-1)); 632 hammer2_cluster_bref(&xop->head.cluster, &bref); 633 634 if (bref.type == HAMMER2_BREF_TYPE_INODE) { 635 ripdata = &hammer2_xop_gdata(&xop->head)->ipdata; 636 dtype = hammer2_get_dtype(ripdata->meta.type); 637 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 638 r = vop_write_dirent(&error, uio, 639 ripdata->meta.inum & 640 HAMMER2_DIRHASH_USERMSK, 641 dtype, 642 ripdata->meta.name_len, 643 ripdata->filename); 644 hammer2_xop_pdata(&xop->head); 645 if (r) 646 break; 647 if (cookies) 648 cookies[cookie_index] = saveoff; 649 ++cookie_index; 650 } else if (bref.type == HAMMER2_BREF_TYPE_DIRENT) { 651 uint16_t namlen; 652 653 dtype = hammer2_get_dtype(bref.embed.dirent.type); 654 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 655 namlen = bref.embed.dirent.namlen; 656 if (namlen <= sizeof(bref.check.buf)) { 657 dname = bref.check.buf; 658 } else { 659 dname = hammer2_xop_gdata(&xop->head)->buf; 660 } 661 r = vop_write_dirent(&error, uio, 662 bref.embed.dirent.inum, dtype, 663 namlen, dname); 664 if (namlen > sizeof(bref.check.buf)) 665 hammer2_xop_pdata(&xop->head); 666 if (r) 667 break; 668 if (cookies) 669 cookies[cookie_index] = saveoff; 670 ++cookie_index; 671 } else { 672 /* XXX chain error */ 673 kprintf("bad chain type readdir %d\n", bref.type); 674 } 675 } 676 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 677 if (error == ENOENT) { 678 error = 0; 679 eofflag = 1; 680 saveoff = (hammer2_key_t)-1; 681 } else { 682 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 683 } 684 done: 685 hammer2_inode_unlock(ip); 686 if (ap->a_eofflag) 687 *ap->a_eofflag = eofflag; 688 if (hammer2_debug & 0x0020) 689 kprintf("readdir: done at %016jx\n", saveoff); 690 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 691 if (error && cookie_index == 0) { 692 if (cookies) { 693 kfree(cookies, M_TEMP); 694 *ap->a_ncookies = 0; 695 *ap->a_cookies = NULL; 696 } 697 } else { 698 if (cookies) { 699 *ap->a_ncookies = cookie_index; 700 *ap->a_cookies = cookies; 701 } 702 } 703 return (error); 704 } 705 706 /* 707 * hammer2_vop_readlink { vp, uio, cred } 708 */ 709 static 710 int 711 hammer2_vop_readlink(struct vop_readlink_args *ap) 712 { 713 struct vnode *vp; 714 hammer2_inode_t *ip; 715 int error; 716 717 vp = ap->a_vp; 718 if (vp->v_type != VLNK) 719 return (EINVAL); 720 ip = VTOI(vp); 721 722 error = hammer2_read_file(ip, ap->a_uio, 0); 723 return (error); 724 } 725 726 static 727 int 728 hammer2_vop_read(struct vop_read_args *ap) 729 { 730 struct vnode *vp; 731 hammer2_inode_t *ip; 732 struct uio *uio; 733 int error; 734 int seqcount; 735 int bigread; 736 737 /* 738 * Read operations supported on this vnode? 739 */ 740 vp = ap->a_vp; 741 if (vp->v_type != VREG) 742 return (EINVAL); 743 744 /* 745 * Misc 746 */ 747 ip = VTOI(vp); 748 uio = ap->a_uio; 749 error = 0; 750 751 seqcount = ap->a_ioflag >> 16; 752 bigread = (uio->uio_resid > 100 * 1024 * 1024); 753 754 error = hammer2_read_file(ip, uio, seqcount); 755 return (error); 756 } 757 758 static 759 int 760 hammer2_vop_write(struct vop_write_args *ap) 761 { 762 hammer2_inode_t *ip; 763 thread_t td; 764 struct vnode *vp; 765 struct uio *uio; 766 int error; 767 int seqcount; 768 int ioflag; 769 770 /* 771 * Read operations supported on this vnode? 772 */ 773 vp = ap->a_vp; 774 if (vp->v_type != VREG) 775 return (EINVAL); 776 777 /* 778 * Misc 779 */ 780 ip = VTOI(vp); 781 ioflag = ap->a_ioflag; 782 uio = ap->a_uio; 783 error = 0; 784 if (ip->pmp->ronly) 785 return (EROFS); 786 switch (hammer2_vfs_enospace(ip, uio->uio_resid, ap->a_cred)) { 787 case 2: 788 return (ENOSPC); 789 case 1: 790 ioflag |= IO_DIRECT; /* semi-synchronous */ 791 /* fall through */ 792 default: 793 break; 794 } 795 796 seqcount = ioflag >> 16; 797 798 /* 799 * Check resource limit 800 */ 801 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 802 uio->uio_offset + uio->uio_resid > 803 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 804 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 805 return (EFBIG); 806 } 807 808 /* 809 * The transaction interlocks against flush initiations 810 * (note: but will run concurrently with the actual flush). 811 * 812 * To avoid deadlocking against the VM system, we must flag any 813 * transaction related to the buffer cache or other direct 814 * VM page manipulation. 815 */ 816 if (uio->uio_segflg == UIO_NOCOPY) { 817 hammer2_trans_init(ip->pmp, HAMMER2_TRANS_BUFCACHE); 818 } else { 819 /*hammer2_pfs_memory_wait(ip->pmp);*/ 820 hammer2_trans_init(ip->pmp, 0); 821 } 822 error = hammer2_write_file(ip, uio, ioflag, seqcount); 823 if (uio->uio_segflg == UIO_NOCOPY) 824 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_BUFCACHE | 825 HAMMER2_TRANS_SIDEQ); 826 else 827 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_SIDEQ); 828 829 return (error); 830 } 831 832 /* 833 * Perform read operations on a file or symlink given an UNLOCKED 834 * inode and uio. 835 * 836 * The passed ip is not locked. 837 */ 838 static 839 int 840 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 841 { 842 hammer2_off_t size; 843 struct buf *bp; 844 int error; 845 846 error = 0; 847 848 /* 849 * UIO read loop. 850 * 851 * WARNING! Assumes that the kernel interlocks size changes at the 852 * vnode level. 853 */ 854 hammer2_mtx_sh(&ip->lock); 855 hammer2_mtx_sh(&ip->truncate_lock); 856 size = ip->meta.size; 857 hammer2_mtx_unlock(&ip->lock); 858 859 while (uio->uio_resid > 0 && uio->uio_offset < size) { 860 hammer2_key_t lbase; 861 hammer2_key_t leof; 862 int lblksize; 863 int loff; 864 int n; 865 866 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 867 &lbase, &leof); 868 869 #if 1 870 bp = NULL; 871 error = cluster_readx(ip->vp, leof, lbase, lblksize, 872 B_NOTMETA | B_KVABIO, 873 uio->uio_resid, 874 seqcount * MAXBSIZE, 875 &bp); 876 #else 877 if (uio->uio_segflg == UIO_NOCOPY) { 878 bp = getblk(ip->vp, lbase, lblksize, 879 GETBLK_BHEAVY | GETBLK_KVABIO, 0); 880 if (bp->b_flags & B_CACHE) { 881 int i; 882 int j = 0; 883 if (bp->b_xio.xio_npages != 16) 884 kprintf("NPAGES BAD\n"); 885 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 886 vm_page_t m; 887 m = bp->b_xio.xio_pages[i]; 888 if (m == NULL || m->valid == 0) { 889 kprintf("bp %016jx %016jx pg %d inv", 890 lbase, leof, i); 891 if (m) 892 kprintf("m->object %p/%p", m->object, ip->vp->v_object); 893 kprintf("\n"); 894 j = 1; 895 } 896 } 897 if (j) 898 kprintf("b_flags %08x, b_error %d\n", bp->b_flags, bp->b_error); 899 } 900 bqrelse(bp); 901 } 902 error = bread_kvabio(ip->vp, lbase, lblksize, &bp); 903 #endif 904 if (error) { 905 brelse(bp); 906 break; 907 } 908 bkvasync(bp); 909 loff = (int)(uio->uio_offset - lbase); 910 n = lblksize - loff; 911 if (n > uio->uio_resid) 912 n = uio->uio_resid; 913 if (n > size - uio->uio_offset) 914 n = (int)(size - uio->uio_offset); 915 bp->b_flags |= B_AGE; 916 uiomovebp(bp, (char *)bp->b_data + loff, n, uio); 917 bqrelse(bp); 918 } 919 hammer2_mtx_unlock(&ip->truncate_lock); 920 921 return (error); 922 } 923 924 /* 925 * Write to the file represented by the inode via the logical buffer cache. 926 * The inode may represent a regular file or a symlink. 927 * 928 * The inode must not be locked. 929 */ 930 static 931 int 932 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 933 int ioflag, int seqcount) 934 { 935 hammer2_key_t old_eof; 936 hammer2_key_t new_eof; 937 struct buf *bp; 938 int kflags; 939 int error; 940 int modified; 941 942 /* 943 * Setup if append 944 * 945 * WARNING! Assumes that the kernel interlocks size changes at the 946 * vnode level. 947 */ 948 hammer2_mtx_ex(&ip->lock); 949 hammer2_mtx_sh(&ip->truncate_lock); 950 if (ioflag & IO_APPEND) 951 uio->uio_offset = ip->meta.size; 952 old_eof = ip->meta.size; 953 954 /* 955 * Extend the file if necessary. If the write fails at some point 956 * we will truncate it back down to cover as much as we were able 957 * to write. 958 * 959 * Doing this now makes it easier to calculate buffer sizes in 960 * the loop. 961 */ 962 kflags = 0; 963 error = 0; 964 modified = 0; 965 966 if (uio->uio_offset + uio->uio_resid > old_eof) { 967 new_eof = uio->uio_offset + uio->uio_resid; 968 modified = 1; 969 hammer2_extend_file(ip, new_eof); 970 kflags |= NOTE_EXTEND; 971 } else { 972 new_eof = old_eof; 973 } 974 hammer2_mtx_unlock(&ip->lock); 975 976 /* 977 * UIO write loop 978 */ 979 while (uio->uio_resid > 0) { 980 hammer2_key_t lbase; 981 int trivial; 982 int endofblk; 983 int lblksize; 984 int loff; 985 int n; 986 987 /* 988 * Don't allow the buffer build to blow out the buffer 989 * cache. 990 */ 991 if ((ioflag & IO_RECURSE) == 0) 992 bwillwrite(HAMMER2_PBUFSIZE); 993 994 /* 995 * This nominally tells us how much we can cluster and 996 * what the logical buffer size needs to be. Currently 997 * we don't try to cluster the write and just handle one 998 * block at a time. 999 */ 1000 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1001 &lbase, NULL); 1002 loff = (int)(uio->uio_offset - lbase); 1003 1004 KKASSERT(lblksize <= 65536); 1005 1006 /* 1007 * Calculate bytes to copy this transfer and whether the 1008 * copy completely covers the buffer or not. 1009 */ 1010 trivial = 0; 1011 n = lblksize - loff; 1012 if (n > uio->uio_resid) { 1013 n = uio->uio_resid; 1014 if (loff == lbase && uio->uio_offset + n == new_eof) 1015 trivial = 1; 1016 endofblk = 0; 1017 } else { 1018 if (loff == 0) 1019 trivial = 1; 1020 endofblk = 1; 1021 } 1022 if (lbase >= new_eof) 1023 trivial = 1; 1024 1025 /* 1026 * Get the buffer 1027 */ 1028 if (uio->uio_segflg == UIO_NOCOPY) { 1029 /* 1030 * Issuing a write with the same data backing the 1031 * buffer. Instantiate the buffer to collect the 1032 * backing vm pages, then read-in any missing bits. 1033 * 1034 * This case is used by vop_stdputpages(). 1035 */ 1036 bp = getblk(ip->vp, lbase, lblksize, 1037 GETBLK_BHEAVY | GETBLK_KVABIO, 0); 1038 if ((bp->b_flags & B_CACHE) == 0) { 1039 bqrelse(bp); 1040 error = bread_kvabio(ip->vp, lbase, 1041 lblksize, &bp); 1042 } 1043 } else if (trivial) { 1044 /* 1045 * Even though we are entirely overwriting the buffer 1046 * we may still have to zero it out to avoid a 1047 * mmap/write visibility issue. 1048 */ 1049 bp = getblk(ip->vp, lbase, lblksize, 1050 GETBLK_BHEAVY | GETBLK_KVABIO, 0); 1051 if ((bp->b_flags & B_CACHE) == 0) 1052 vfs_bio_clrbuf(bp); 1053 } else { 1054 /* 1055 * Partial overwrite, read in any missing bits then 1056 * replace the portion being written. 1057 * 1058 * (The strategy code will detect zero-fill physical 1059 * blocks for this case). 1060 */ 1061 error = bread_kvabio(ip->vp, lbase, lblksize, &bp); 1062 if (error == 0) 1063 bheavy(bp); 1064 } 1065 1066 if (error) { 1067 brelse(bp); 1068 break; 1069 } 1070 1071 /* 1072 * Ok, copy the data in 1073 */ 1074 bkvasync(bp); 1075 error = uiomovebp(bp, bp->b_data + loff, n, uio); 1076 kflags |= NOTE_WRITE; 1077 modified = 1; 1078 if (error) { 1079 brelse(bp); 1080 break; 1081 } 1082 1083 /* 1084 * WARNING: Pageout daemon will issue UIO_NOCOPY writes 1085 * with IO_SYNC or IO_ASYNC set. These writes 1086 * must be handled as the pageout daemon expects. 1087 * 1088 * NOTE! H2 relies on cluster_write() here because it 1089 * cannot preallocate disk blocks at the logical 1090 * level due to not knowing what the compression 1091 * size will be at this time. 1092 * 1093 * We must use cluster_write() here and we depend 1094 * on the write-behind feature to flush buffers 1095 * appropriately. If we let the buffer daemons do 1096 * it the block allocations will be all over the 1097 * map. 1098 */ 1099 if (ioflag & IO_SYNC) { 1100 bwrite(bp); 1101 } else if ((ioflag & IO_DIRECT) && endofblk) { 1102 bawrite(bp); 1103 } else if (ioflag & IO_ASYNC) { 1104 bawrite(bp); 1105 } else if (ip->vp->v_mount->mnt_flag & MNT_NOCLUSTERW) { 1106 bdwrite(bp); 1107 } else { 1108 #if 1 1109 bp->b_flags |= B_CLUSTEROK; 1110 cluster_write(bp, new_eof, lblksize, seqcount); 1111 #else 1112 bp->b_flags |= B_CLUSTEROK; 1113 bdwrite(bp); 1114 #endif 1115 } 1116 } 1117 1118 /* 1119 * Cleanup. If we extended the file EOF but failed to write through 1120 * the entire write is a failure and we have to back-up. 1121 */ 1122 if (error && new_eof != old_eof) { 1123 hammer2_mtx_unlock(&ip->truncate_lock); 1124 hammer2_mtx_ex(&ip->lock); 1125 hammer2_mtx_ex(&ip->truncate_lock); 1126 hammer2_truncate_file(ip, old_eof); 1127 if (ip->flags & HAMMER2_INODE_MODIFIED) 1128 hammer2_inode_chain_sync(ip); 1129 hammer2_mtx_unlock(&ip->lock); 1130 } else if (modified) { 1131 struct vnode *vp = ip->vp; 1132 1133 hammer2_mtx_ex(&ip->lock); 1134 hammer2_inode_modify(ip); 1135 if (uio->uio_segflg == UIO_NOCOPY) { 1136 if (vp->v_flag & VLASTWRITETS) { 1137 ip->meta.mtime = 1138 (unsigned long)vp->v_lastwrite_ts.tv_sec * 1139 1000000 + 1140 vp->v_lastwrite_ts.tv_nsec / 1000; 1141 } 1142 } else { 1143 hammer2_update_time(&ip->meta.mtime); 1144 vclrflags(vp, VLASTWRITETS); 1145 } 1146 1147 #if 0 1148 /* 1149 * REMOVED - handled by hammer2_extend_file(). Do not issue 1150 * a chain_sync() outside of a sync/fsync except for DIRECTDATA 1151 * state changes. 1152 * 1153 * Under normal conditions we only issue a chain_sync if 1154 * the inode's DIRECTDATA state changed. 1155 */ 1156 if (ip->flags & HAMMER2_INODE_RESIZED) 1157 hammer2_inode_chain_sync(ip); 1158 #endif 1159 hammer2_mtx_unlock(&ip->lock); 1160 hammer2_knote(ip->vp, kflags); 1161 } 1162 hammer2_trans_assert_strategy(ip->pmp); 1163 hammer2_mtx_unlock(&ip->truncate_lock); 1164 1165 return error; 1166 } 1167 1168 /* 1169 * Truncate the size of a file. The inode must not be locked. 1170 * 1171 * We must unconditionally set HAMMER2_INODE_RESIZED to properly 1172 * ensure that any on-media data beyond the new file EOF has been destroyed. 1173 * 1174 * WARNING: nvtruncbuf() can only be safely called without the inode lock 1175 * held due to the way our write thread works. If the truncation 1176 * occurs in the middle of a buffer, nvtruncbuf() is responsible 1177 * for dirtying that buffer and zeroing out trailing bytes. 1178 * 1179 * WARNING! Assumes that the kernel interlocks size changes at the 1180 * vnode level. 1181 * 1182 * WARNING! Caller assumes responsibility for removing dead blocks 1183 * if INODE_RESIZED is set. 1184 */ 1185 static 1186 void 1187 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1188 { 1189 hammer2_key_t lbase; 1190 int nblksize; 1191 1192 hammer2_mtx_unlock(&ip->lock); 1193 if (ip->vp) { 1194 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1195 nvtruncbuf(ip->vp, nsize, 1196 nblksize, (int)nsize & (nblksize - 1), 1197 0); 1198 } 1199 hammer2_mtx_ex(&ip->lock); 1200 KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0); 1201 ip->osize = ip->meta.size; 1202 ip->meta.size = nsize; 1203 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1204 hammer2_inode_modify(ip); 1205 } 1206 1207 /* 1208 * Extend the size of a file. The inode must not be locked. 1209 * 1210 * Even though the file size is changing, we do not have to set the 1211 * INODE_RESIZED bit unless the file size crosses the EMBEDDED_BYTES 1212 * boundary. When this occurs a hammer2_inode_chain_sync() is required 1213 * to prepare the inode cluster's indirect block table, otherwise 1214 * async execution of the strategy code will implode on us. 1215 * 1216 * WARNING! Assumes that the kernel interlocks size changes at the 1217 * vnode level. 1218 * 1219 * WARNING! Caller assumes responsibility for transitioning out 1220 * of the inode DIRECTDATA mode if INODE_RESIZED is set. 1221 */ 1222 static 1223 void 1224 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1225 { 1226 hammer2_key_t lbase; 1227 hammer2_key_t osize; 1228 int oblksize; 1229 int nblksize; 1230 1231 KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0); 1232 hammer2_inode_modify(ip); 1233 osize = ip->meta.size; 1234 ip->osize = osize; 1235 ip->meta.size = nsize; 1236 1237 /* 1238 * We must issue a chain_sync() when the DIRECTDATA state changes 1239 * to prevent confusion between the flush code and the in-memory 1240 * state. This is not perfect because we are doing it outside of 1241 * a sync/fsync operation, so it might not be fully synchronized 1242 * with the meta-data topology flush. 1243 */ 1244 if (osize <= HAMMER2_EMBEDDED_BYTES && nsize > HAMMER2_EMBEDDED_BYTES) { 1245 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1246 hammer2_inode_chain_sync(ip); 1247 } 1248 1249 hammer2_mtx_unlock(&ip->lock); 1250 if (ip->vp) { 1251 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1252 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1253 nvextendbuf(ip->vp, 1254 osize, nsize, 1255 oblksize, nblksize, 1256 -1, -1, 0); 1257 } 1258 hammer2_mtx_ex(&ip->lock); 1259 } 1260 1261 static 1262 int 1263 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1264 { 1265 hammer2_xop_nresolve_t *xop; 1266 hammer2_inode_t *ip; 1267 hammer2_inode_t *dip; 1268 struct namecache *ncp; 1269 struct vnode *vp; 1270 int error; 1271 1272 dip = VTOI(ap->a_dvp); 1273 xop = hammer2_xop_alloc(dip, 0); 1274 1275 ncp = ap->a_nch->ncp; 1276 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen); 1277 1278 /* 1279 * Note: In DragonFly the kernel handles '.' and '..'. 1280 */ 1281 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED); 1282 hammer2_xop_start(&xop->head, &hammer2_nresolve_desc); 1283 1284 error = hammer2_xop_collect(&xop->head, 0); 1285 error = hammer2_error_to_errno(error); 1286 if (error) { 1287 ip = NULL; 1288 } else { 1289 ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1); 1290 } 1291 hammer2_inode_unlock(dip); 1292 1293 /* 1294 * Acquire the related vnode 1295 * 1296 * NOTE: For error processing, only ENOENT resolves the namecache 1297 * entry to NULL, otherwise we just return the error and 1298 * leave the namecache unresolved. 1299 * 1300 * NOTE: multiple hammer2_inode structures can be aliased to the 1301 * same chain element, for example for hardlinks. This 1302 * use case does not 'reattach' inode associations that 1303 * might already exist, but always allocates a new one. 1304 * 1305 * WARNING: inode structure is locked exclusively via inode_get 1306 * but chain was locked shared. inode_unlock() 1307 * will handle it properly. 1308 */ 1309 if (ip) { 1310 vp = hammer2_igetv(ip, &error); /* error set to UNIX error */ 1311 if (error == 0) { 1312 vn_unlock(vp); 1313 cache_setvp(ap->a_nch, vp); 1314 } else if (error == ENOENT) { 1315 cache_setvp(ap->a_nch, NULL); 1316 } 1317 hammer2_inode_unlock(ip); 1318 1319 /* 1320 * The vp should not be released until after we've disposed 1321 * of our locks, because it might cause vop_inactive() to 1322 * be called. 1323 */ 1324 if (vp) 1325 vrele(vp); 1326 } else { 1327 error = ENOENT; 1328 cache_setvp(ap->a_nch, NULL); 1329 } 1330 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1331 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1332 ("resolve error %d/%p ap %p\n", 1333 error, ap->a_nch->ncp->nc_vp, ap)); 1334 1335 return error; 1336 } 1337 1338 static 1339 int 1340 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1341 { 1342 hammer2_inode_t *dip; 1343 hammer2_tid_t inum; 1344 int error; 1345 1346 dip = VTOI(ap->a_dvp); 1347 inum = dip->meta.iparent; 1348 *ap->a_vpp = NULL; 1349 1350 if (inum) { 1351 error = hammer2_vfs_vget(ap->a_dvp->v_mount, NULL, 1352 inum, ap->a_vpp); 1353 } else { 1354 error = ENOENT; 1355 } 1356 return error; 1357 } 1358 1359 static 1360 int 1361 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1362 { 1363 hammer2_inode_t *dip; 1364 hammer2_inode_t *nip; 1365 struct namecache *ncp; 1366 const uint8_t *name; 1367 size_t name_len; 1368 hammer2_tid_t inum; 1369 int error; 1370 1371 dip = VTOI(ap->a_dvp); 1372 if (dip->pmp->ronly) 1373 return (EROFS); 1374 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1375 return (ENOSPC); 1376 1377 ncp = ap->a_nch->ncp; 1378 name = ncp->nc_name; 1379 name_len = ncp->nc_nlen; 1380 1381 /*hammer2_pfs_memory_wait(dip->pmp);*/ 1382 hammer2_trans_init(dip->pmp, 0); 1383 1384 inum = hammer2_trans_newinum(dip->pmp); 1385 1386 /* 1387 * Create the actual inode as a hidden file in the iroot, then 1388 * create the directory entry. The creation of the actual inode 1389 * sets its nlinks to 1 which is the value we desire. 1390 * 1391 * dip must be locked before nip to avoid deadlock. 1392 */ 1393 hammer2_inode_lock(dip, 0); 1394 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1395 inum, &error); 1396 if (error) { 1397 error = hammer2_error_to_errno(error); 1398 } else { 1399 error = hammer2_dirent_create(dip, name, name_len, 1400 nip->meta.inum, nip->meta.type); 1401 /* returns UNIX error code */ 1402 } 1403 if (error) { 1404 if (nip) { 1405 hammer2_inode_unlink_finisher(nip, 0); 1406 hammer2_inode_unlock(nip); 1407 nip = NULL; 1408 } 1409 *ap->a_vpp = NULL; 1410 } else { 1411 /* 1412 * inode_depend() must occur before the igetv() because 1413 * the igetv() can temporarily release the inode lock. 1414 */ 1415 hammer2_inode_depend(dip, nip); /* before igetv */ 1416 *ap->a_vpp = hammer2_igetv(nip, &error); 1417 hammer2_inode_unlock(nip); 1418 } 1419 1420 /* 1421 * Update dip's mtime 1422 * 1423 * We can use a shared inode lock and allow the meta.mtime update 1424 * SMP race. hammer2_inode_modify() is MPSAFE w/a shared lock. 1425 */ 1426 if (error == 0) { 1427 uint64_t mtime; 1428 1429 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1430 hammer2_update_time(&mtime); 1431 hammer2_inode_modify(dip); 1432 dip->meta.mtime = mtime; 1433 /*hammer2_inode_unlock(dip);*/ 1434 } 1435 hammer2_inode_unlock(dip); 1436 1437 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1438 1439 if (error == 0) { 1440 cache_setunresolved(ap->a_nch); 1441 cache_setvp(ap->a_nch, *ap->a_vpp); 1442 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); 1443 } 1444 return error; 1445 } 1446 1447 static 1448 int 1449 hammer2_vop_open(struct vop_open_args *ap) 1450 { 1451 return vop_stdopen(ap); 1452 } 1453 1454 /* 1455 * hammer2_vop_advlock { vp, id, op, fl, flags } 1456 */ 1457 static 1458 int 1459 hammer2_vop_advlock(struct vop_advlock_args *ap) 1460 { 1461 hammer2_inode_t *ip = VTOI(ap->a_vp); 1462 hammer2_off_t size; 1463 1464 size = ip->meta.size; 1465 return (lf_advlock(ap, &ip->advlock, size)); 1466 } 1467 1468 static 1469 int 1470 hammer2_vop_close(struct vop_close_args *ap) 1471 { 1472 return vop_stdclose(ap); 1473 } 1474 1475 /* 1476 * hammer2_vop_nlink { nch, dvp, vp, cred } 1477 * 1478 * Create a hardlink from (vp) to {dvp, nch}. 1479 */ 1480 static 1481 int 1482 hammer2_vop_nlink(struct vop_nlink_args *ap) 1483 { 1484 hammer2_inode_t *tdip; /* target directory to create link in */ 1485 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1486 struct namecache *ncp; 1487 const uint8_t *name; 1488 size_t name_len; 1489 int error; 1490 1491 if (ap->a_dvp->v_mount != ap->a_vp->v_mount) 1492 return(EXDEV); 1493 1494 tdip = VTOI(ap->a_dvp); 1495 if (tdip->pmp->ronly) 1496 return (EROFS); 1497 if (hammer2_vfs_enospace(tdip, 0, ap->a_cred) > 1) 1498 return (ENOSPC); 1499 1500 ncp = ap->a_nch->ncp; 1501 name = ncp->nc_name; 1502 name_len = ncp->nc_nlen; 1503 1504 /* 1505 * ip represents the file being hardlinked. The file could be a 1506 * normal file or a hardlink target if it has already been hardlinked. 1507 * (with the new semantics, it will almost always be a hardlink 1508 * target). 1509 * 1510 * Bump nlinks and potentially also create or move the hardlink 1511 * target in the parent directory common to (ip) and (tdip). The 1512 * consolidation code can modify ip->cluster. The returned cluster 1513 * is locked. 1514 */ 1515 ip = VTOI(ap->a_vp); 1516 KASSERT(ip->pmp, ("ip->pmp is NULL %p %p", ip, ip->pmp)); 1517 /*hammer2_pfs_memory_wait(ip->pmp);*/ 1518 hammer2_trans_init(ip->pmp, 0); 1519 1520 /* 1521 * Target should be an indexed inode or there's no way we will ever 1522 * be able to find it! 1523 */ 1524 KKASSERT((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0); 1525 1526 error = 0; 1527 1528 /* 1529 * Can return NULL and error == EXDEV if the common parent 1530 * crosses a directory with the xlink flag set. 1531 */ 1532 hammer2_inode_lock4(tdip, ip, NULL, NULL); 1533 1534 /* 1535 * Create the directory entry and bump nlinks. 1536 */ 1537 if (error == 0) { 1538 error = hammer2_dirent_create(tdip, name, name_len, 1539 ip->meta.inum, ip->meta.type); 1540 hammer2_inode_modify(ip); 1541 ++ip->meta.nlinks; 1542 } 1543 if (error == 0) { 1544 /* 1545 * Update dip's mtime 1546 */ 1547 uint64_t mtime; 1548 1549 hammer2_update_time(&mtime); 1550 hammer2_inode_modify(tdip); 1551 tdip->meta.mtime = mtime; 1552 1553 cache_setunresolved(ap->a_nch); 1554 cache_setvp(ap->a_nch, ap->a_vp); 1555 } 1556 hammer2_inode_unlock(ip); 1557 hammer2_inode_unlock(tdip); 1558 1559 hammer2_trans_done(ip->pmp, HAMMER2_TRANS_SIDEQ); 1560 hammer2_knote(ap->a_vp, NOTE_LINK); 1561 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1562 1563 return error; 1564 } 1565 1566 /* 1567 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1568 * 1569 * The operating system has already ensured that the directory entry 1570 * does not exist and done all appropriate namespace locking. 1571 */ 1572 static 1573 int 1574 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1575 { 1576 hammer2_inode_t *dip; 1577 hammer2_inode_t *nip; 1578 struct namecache *ncp; 1579 const uint8_t *name; 1580 size_t name_len; 1581 hammer2_tid_t inum; 1582 int error; 1583 1584 dip = VTOI(ap->a_dvp); 1585 if (dip->pmp->ronly) 1586 return (EROFS); 1587 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1588 return (ENOSPC); 1589 1590 ncp = ap->a_nch->ncp; 1591 name = ncp->nc_name; 1592 name_len = ncp->nc_nlen; 1593 /*hammer2_pfs_memory_wait(dip->pmp);*/ 1594 hammer2_trans_init(dip->pmp, 0); 1595 1596 inum = hammer2_trans_newinum(dip->pmp); 1597 1598 /* 1599 * Create the actual inode as a hidden file in the iroot, then 1600 * create the directory entry. The creation of the actual inode 1601 * sets its nlinks to 1 which is the value we desire. 1602 * 1603 * dip must be locked before nip to avoid deadlock. 1604 */ 1605 hammer2_inode_lock(dip, 0); 1606 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1607 inum, &error); 1608 1609 if (error) { 1610 error = hammer2_error_to_errno(error); 1611 } else { 1612 error = hammer2_dirent_create(dip, name, name_len, 1613 nip->meta.inum, nip->meta.type); 1614 } 1615 if (error) { 1616 if (nip) { 1617 hammer2_inode_unlink_finisher(nip, 0); 1618 hammer2_inode_unlock(nip); 1619 nip = NULL; 1620 } 1621 *ap->a_vpp = NULL; 1622 } else { 1623 hammer2_inode_depend(dip, nip); /* before igetv */ 1624 *ap->a_vpp = hammer2_igetv(nip, &error); 1625 hammer2_inode_unlock(nip); 1626 } 1627 1628 /* 1629 * Update dip's mtime 1630 */ 1631 if (error == 0) { 1632 uint64_t mtime; 1633 1634 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1635 hammer2_update_time(&mtime); 1636 hammer2_inode_modify(dip); 1637 dip->meta.mtime = mtime; 1638 /*hammer2_inode_unlock(dip);*/ 1639 } 1640 hammer2_inode_unlock(dip); 1641 1642 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1643 1644 if (error == 0) { 1645 cache_setunresolved(ap->a_nch); 1646 cache_setvp(ap->a_nch, *ap->a_vpp); 1647 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1648 } 1649 return error; 1650 } 1651 1652 /* 1653 * Make a device node (typically a fifo) 1654 */ 1655 static 1656 int 1657 hammer2_vop_nmknod(struct vop_nmknod_args *ap) 1658 { 1659 hammer2_inode_t *dip; 1660 hammer2_inode_t *nip; 1661 struct namecache *ncp; 1662 const uint8_t *name; 1663 size_t name_len; 1664 hammer2_tid_t inum; 1665 int error; 1666 1667 dip = VTOI(ap->a_dvp); 1668 if (dip->pmp->ronly) 1669 return (EROFS); 1670 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1671 return (ENOSPC); 1672 1673 ncp = ap->a_nch->ncp; 1674 name = ncp->nc_name; 1675 name_len = ncp->nc_nlen; 1676 /*hammer2_pfs_memory_wait(dip->pmp);*/ 1677 hammer2_trans_init(dip->pmp, 0); 1678 1679 /* 1680 * Create the device inode and then create the directory entry. 1681 * 1682 * dip must be locked before nip to avoid deadlock. 1683 */ 1684 inum = hammer2_trans_newinum(dip->pmp); 1685 1686 hammer2_inode_lock(dip, 0); 1687 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1688 inum, &error); 1689 if (error == 0) { 1690 error = hammer2_dirent_create(dip, name, name_len, 1691 nip->meta.inum, nip->meta.type); 1692 } 1693 if (error) { 1694 if (nip) { 1695 hammer2_inode_unlink_finisher(nip, 0); 1696 hammer2_inode_unlock(nip); 1697 nip = NULL; 1698 } 1699 *ap->a_vpp = NULL; 1700 } else { 1701 hammer2_inode_depend(dip, nip); /* before igetv */ 1702 *ap->a_vpp = hammer2_igetv(nip, &error); 1703 hammer2_inode_unlock(nip); 1704 } 1705 1706 /* 1707 * Update dip's mtime 1708 */ 1709 if (error == 0) { 1710 uint64_t mtime; 1711 1712 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1713 hammer2_update_time(&mtime); 1714 hammer2_inode_modify(dip); 1715 dip->meta.mtime = mtime; 1716 /*hammer2_inode_unlock(dip);*/ 1717 } 1718 hammer2_inode_unlock(dip); 1719 1720 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1721 1722 if (error == 0) { 1723 cache_setunresolved(ap->a_nch); 1724 cache_setvp(ap->a_nch, *ap->a_vpp); 1725 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1726 } 1727 return error; 1728 } 1729 1730 /* 1731 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1732 */ 1733 static 1734 int 1735 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1736 { 1737 hammer2_inode_t *dip; 1738 hammer2_inode_t *nip; 1739 struct namecache *ncp; 1740 const uint8_t *name; 1741 size_t name_len; 1742 hammer2_tid_t inum; 1743 int error; 1744 1745 dip = VTOI(ap->a_dvp); 1746 if (dip->pmp->ronly) 1747 return (EROFS); 1748 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1749 return (ENOSPC); 1750 1751 ncp = ap->a_nch->ncp; 1752 name = ncp->nc_name; 1753 name_len = ncp->nc_nlen; 1754 /*hammer2_pfs_memory_wait(dip->pmp);*/ 1755 hammer2_trans_init(dip->pmp, 0); 1756 1757 ap->a_vap->va_type = VLNK; /* enforce type */ 1758 1759 /* 1760 * Create the softlink as an inode and then create the directory 1761 * entry. 1762 * 1763 * dip must be locked before nip to avoid deadlock. 1764 */ 1765 inum = hammer2_trans_newinum(dip->pmp); 1766 1767 hammer2_inode_lock(dip, 0); 1768 nip = hammer2_inode_create_normal(dip, ap->a_vap, ap->a_cred, 1769 inum, &error); 1770 if (error == 0) { 1771 error = hammer2_dirent_create(dip, name, name_len, 1772 nip->meta.inum, nip->meta.type); 1773 } 1774 if (error) { 1775 if (nip) { 1776 hammer2_inode_unlink_finisher(nip, 0); 1777 hammer2_inode_unlock(nip); 1778 nip = NULL; 1779 } 1780 *ap->a_vpp = NULL; 1781 hammer2_inode_unlock(dip); 1782 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1783 return error; 1784 } 1785 hammer2_inode_depend(dip, nip); /* before igetv */ 1786 *ap->a_vpp = hammer2_igetv(nip, &error); 1787 1788 /* 1789 * Build the softlink (~like file data) and finalize the namecache. 1790 */ 1791 if (error == 0) { 1792 size_t bytes; 1793 struct uio auio; 1794 struct iovec aiov; 1795 1796 bytes = strlen(ap->a_target); 1797 1798 hammer2_inode_unlock(nip); 1799 bzero(&auio, sizeof(auio)); 1800 bzero(&aiov, sizeof(aiov)); 1801 auio.uio_iov = &aiov; 1802 auio.uio_segflg = UIO_SYSSPACE; 1803 auio.uio_rw = UIO_WRITE; 1804 auio.uio_resid = bytes; 1805 auio.uio_iovcnt = 1; 1806 auio.uio_td = curthread; 1807 aiov.iov_base = ap->a_target; 1808 aiov.iov_len = bytes; 1809 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1810 /* XXX handle error */ 1811 error = 0; 1812 } else { 1813 hammer2_inode_unlock(nip); 1814 } 1815 1816 /* 1817 * Update dip's mtime 1818 */ 1819 if (error == 0) { 1820 uint64_t mtime; 1821 1822 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1823 hammer2_update_time(&mtime); 1824 hammer2_inode_modify(dip); 1825 dip->meta.mtime = mtime; 1826 /*hammer2_inode_unlock(dip);*/ 1827 } 1828 hammer2_inode_unlock(dip); 1829 1830 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1831 1832 /* 1833 * Finalize namecache 1834 */ 1835 if (error == 0) { 1836 cache_setunresolved(ap->a_nch); 1837 cache_setvp(ap->a_nch, *ap->a_vpp); 1838 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1839 } 1840 return error; 1841 } 1842 1843 /* 1844 * hammer2_vop_nremove { nch, dvp, cred } 1845 */ 1846 static 1847 int 1848 hammer2_vop_nremove(struct vop_nremove_args *ap) 1849 { 1850 hammer2_xop_unlink_t *xop; 1851 hammer2_inode_t *dip; 1852 hammer2_inode_t *ip; 1853 struct namecache *ncp; 1854 int error; 1855 int isopen; 1856 1857 dip = VTOI(ap->a_dvp); 1858 if (dip->pmp->ronly) 1859 return (EROFS); 1860 #if 0 1861 /* allow removals, except user to also bulkfree */ 1862 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1863 return (ENOSPC); 1864 #endif 1865 1866 ncp = ap->a_nch->ncp; 1867 1868 if (hammer2_debug_inode && dip->meta.inum == hammer2_debug_inode) { 1869 kprintf("hammer2: attempt to delete inside debug inode: %s\n", 1870 ncp->nc_name); 1871 while (hammer2_debug_inode && 1872 dip->meta.inum == hammer2_debug_inode) { 1873 tsleep(&hammer2_debug_inode, 0, "h2debug", hz*5); 1874 } 1875 } 1876 1877 /*hammer2_pfs_memory_wait(dip->pmp);*/ 1878 hammer2_trans_init(dip->pmp, 0); 1879 hammer2_inode_lock(dip, 0); 1880 1881 /* 1882 * The unlink XOP unlinks the path from the directory and 1883 * locates and returns the cluster associated with the real inode. 1884 * We have to handle nlinks here on the frontend. 1885 */ 1886 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 1887 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen); 1888 1889 /* 1890 * The namecache entry is locked so nobody can use this namespace. 1891 * Calculate isopen to determine if this namespace has an open vp 1892 * associated with it and resolve the vp only if it does. 1893 * 1894 * We try to avoid resolving the vnode if nobody has it open, but 1895 * note that the test is via this namespace only. 1896 */ 1897 isopen = cache_isopen(ap->a_nch); 1898 xop->isdir = 0; 1899 xop->dopermanent = 0; 1900 hammer2_xop_start(&xop->head, &hammer2_unlink_desc); 1901 1902 /* 1903 * Collect the real inode and adjust nlinks, destroy the real 1904 * inode if nlinks transitions to 0 and it was the real inode 1905 * (else it has already been removed). 1906 */ 1907 error = hammer2_xop_collect(&xop->head, 0); 1908 error = hammer2_error_to_errno(error); 1909 1910 if (error == 0) { 1911 ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1); 1912 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1913 if (ip) { 1914 if (hammer2_debug_inode && 1915 ip->meta.inum == hammer2_debug_inode) { 1916 kprintf("hammer2: attempt to delete debug " 1917 "inode!\n"); 1918 while (hammer2_debug_inode && 1919 ip->meta.inum == hammer2_debug_inode) { 1920 tsleep(&hammer2_debug_inode, 0, 1921 "h2debug", hz*5); 1922 } 1923 } 1924 hammer2_inode_unlink_finisher(ip, isopen); 1925 hammer2_inode_depend(dip, ip); /* after modified */ 1926 hammer2_inode_unlock(ip); 1927 } 1928 } else { 1929 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1930 } 1931 1932 /* 1933 * Update dip's mtime 1934 */ 1935 if (error == 0) { 1936 uint64_t mtime; 1937 1938 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 1939 hammer2_update_time(&mtime); 1940 hammer2_inode_modify(dip); 1941 dip->meta.mtime = mtime; 1942 /*hammer2_inode_unlock(dip);*/ 1943 } 1944 hammer2_inode_unlock(dip); 1945 1946 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 1947 if (error == 0) { 1948 cache_unlink(ap->a_nch); 1949 hammer2_knote(ap->a_dvp, NOTE_WRITE); 1950 } 1951 return (error); 1952 } 1953 1954 /* 1955 * hammer2_vop_nrmdir { nch, dvp, cred } 1956 */ 1957 static 1958 int 1959 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1960 { 1961 hammer2_xop_unlink_t *xop; 1962 hammer2_inode_t *dip; 1963 hammer2_inode_t *ip; 1964 struct namecache *ncp; 1965 int isopen; 1966 int error; 1967 1968 dip = VTOI(ap->a_dvp); 1969 if (dip->pmp->ronly) 1970 return (EROFS); 1971 #if 0 1972 /* allow removals, except user to also bulkfree */ 1973 if (hammer2_vfs_enospace(dip, 0, ap->a_cred) > 1) 1974 return (ENOSPC); 1975 #endif 1976 1977 /*hammer2_pfs_memory_wait(dip->pmp);*/ 1978 hammer2_trans_init(dip->pmp, 0); 1979 hammer2_inode_lock(dip, 0); 1980 1981 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 1982 1983 ncp = ap->a_nch->ncp; 1984 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen); 1985 isopen = cache_isopen(ap->a_nch); 1986 xop->isdir = 1; 1987 xop->dopermanent = 0; 1988 hammer2_xop_start(&xop->head, &hammer2_unlink_desc); 1989 1990 /* 1991 * Collect the real inode and adjust nlinks, destroy the real 1992 * inode if nlinks transitions to 0 and it was the real inode 1993 * (else it has already been removed). 1994 */ 1995 error = hammer2_xop_collect(&xop->head, 0); 1996 error = hammer2_error_to_errno(error); 1997 1998 if (error == 0) { 1999 ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1); 2000 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 2001 if (ip) { 2002 hammer2_inode_unlink_finisher(ip, isopen); 2003 hammer2_inode_depend(dip, ip); /* after modified */ 2004 hammer2_inode_unlock(ip); 2005 } 2006 } else { 2007 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 2008 } 2009 2010 /* 2011 * Update dip's mtime 2012 */ 2013 if (error == 0) { 2014 uint64_t mtime; 2015 2016 /*hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);*/ 2017 hammer2_update_time(&mtime); 2018 hammer2_inode_modify(dip); 2019 dip->meta.mtime = mtime; 2020 /*hammer2_inode_unlock(dip);*/ 2021 } 2022 hammer2_inode_unlock(dip); 2023 2024 hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ); 2025 if (error == 0) { 2026 cache_unlink(ap->a_nch); 2027 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK); 2028 } 2029 return (error); 2030 } 2031 2032 /* 2033 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 2034 */ 2035 static 2036 int 2037 hammer2_vop_nrename(struct vop_nrename_args *ap) 2038 { 2039 struct namecache *fncp; 2040 struct namecache *tncp; 2041 hammer2_inode_t *fdip; /* source directory */ 2042 hammer2_inode_t *tdip; /* target directory */ 2043 hammer2_inode_t *ip; /* file being renamed */ 2044 hammer2_inode_t *tip; /* replaced target during rename or NULL */ 2045 const uint8_t *fname; 2046 size_t fname_len; 2047 const uint8_t *tname; 2048 size_t tname_len; 2049 int error; 2050 int update_tdip; 2051 int update_fdip; 2052 hammer2_key_t tlhc; 2053 2054 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 2055 return(EXDEV); 2056 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 2057 return(EXDEV); 2058 2059 fdip = VTOI(ap->a_fdvp); /* source directory */ 2060 tdip = VTOI(ap->a_tdvp); /* target directory */ 2061 2062 if (fdip->pmp->ronly) 2063 return (EROFS); 2064 if (hammer2_vfs_enospace(fdip, 0, ap->a_cred) > 1) 2065 return (ENOSPC); 2066 2067 fncp = ap->a_fnch->ncp; /* entry name in source */ 2068 fname = fncp->nc_name; 2069 fname_len = fncp->nc_nlen; 2070 2071 tncp = ap->a_tnch->ncp; /* entry name in target */ 2072 tname = tncp->nc_name; 2073 tname_len = tncp->nc_nlen; 2074 2075 /*hammer2_pfs_memory_wait(tdip->pmp);*/ 2076 hammer2_trans_init(tdip->pmp, 0); 2077 2078 update_tdip = 0; 2079 update_fdip = 0; 2080 2081 ip = VTOI(fncp->nc_vp); 2082 hammer2_inode_ref(ip); /* extra ref */ 2083 2084 /* 2085 * Lookup the target name to determine if a directory entry 2086 * is being overwritten. We only hold related inode locks 2087 * temporarily, the operating system is expected to protect 2088 * against rename races. 2089 */ 2090 tip = tncp->nc_vp ? VTOI(tncp->nc_vp) : NULL; 2091 if (tip) 2092 hammer2_inode_ref(tip); /* extra ref */ 2093 2094 /* 2095 * Can return NULL and error == EXDEV if the common parent 2096 * crosses a directory with the xlink flag set. 2097 * 2098 * For now try to avoid deadlocks with a simple pointer address 2099 * test. (tip) can be NULL. 2100 */ 2101 error = 0; 2102 { 2103 hammer2_inode_t *ip1 = fdip; 2104 hammer2_inode_t *ip2 = tdip; 2105 hammer2_inode_t *ip3 = ip; 2106 hammer2_inode_t *ip4 = tip; /* may be NULL */ 2107 2108 if (fdip > tdip) { 2109 ip1 = tdip; 2110 ip2 = fdip; 2111 } 2112 if (tip && ip > tip) { 2113 ip3 = tip; 2114 ip4 = ip; 2115 } 2116 hammer2_inode_lock4(ip1, ip2, ip3, ip4); 2117 } 2118 2119 /* 2120 * Resolve the collision space for (tdip, tname, tname_len) 2121 * 2122 * tdip must be held exclusively locked to prevent races since 2123 * multiple filenames can end up in the same collision space. 2124 */ 2125 { 2126 hammer2_xop_scanlhc_t *sxop; 2127 hammer2_tid_t lhcbase; 2128 2129 tlhc = hammer2_dirhash(tname, tname_len); 2130 lhcbase = tlhc; 2131 sxop = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING); 2132 sxop->lhc = tlhc; 2133 hammer2_xop_start(&sxop->head, &hammer2_scanlhc_desc); 2134 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) { 2135 if (tlhc != sxop->head.cluster.focus->bref.key) 2136 break; 2137 ++tlhc; 2138 } 2139 error = hammer2_error_to_errno(error); 2140 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP); 2141 2142 if (error) { 2143 if (error != ENOENT) 2144 goto done2; 2145 ++tlhc; 2146 error = 0; 2147 } 2148 if ((lhcbase ^ tlhc) & ~HAMMER2_DIRHASH_LOMASK) { 2149 error = ENOSPC; 2150 goto done2; 2151 } 2152 } 2153 2154 /* 2155 * Ready to go, issue the rename to the backend. Note that meta-data 2156 * updates to the related inodes occur separately from the rename 2157 * operation. 2158 * 2159 * NOTE: While it is not necessary to update ip->meta.name*, doing 2160 * so aids catastrophic recovery and debugging. 2161 */ 2162 if (error == 0) { 2163 hammer2_xop_nrename_t *xop4; 2164 2165 xop4 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING); 2166 xop4->lhc = tlhc; 2167 xop4->ip_key = ip->meta.name_key; 2168 hammer2_xop_setip2(&xop4->head, ip); 2169 hammer2_xop_setip3(&xop4->head, tdip); 2170 hammer2_xop_setname(&xop4->head, fname, fname_len); 2171 hammer2_xop_setname2(&xop4->head, tname, tname_len); 2172 hammer2_xop_start(&xop4->head, &hammer2_nrename_desc); 2173 2174 error = hammer2_xop_collect(&xop4->head, 0); 2175 error = hammer2_error_to_errno(error); 2176 hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP); 2177 2178 if (error == ENOENT) 2179 error = 0; 2180 2181 /* 2182 * Update inode meta-data. 2183 * 2184 * WARNING! The in-memory inode (ip) structure does not 2185 * maintain a copy of the inode's filename buffer. 2186 */ 2187 if (error == 0 && 2188 (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) { 2189 hammer2_inode_modify(ip); 2190 ip->meta.name_len = tname_len; 2191 ip->meta.name_key = tlhc; 2192 } 2193 if (error == 0) { 2194 hammer2_inode_modify(ip); 2195 ip->meta.iparent = tdip->meta.inum; 2196 } 2197 update_fdip = 1; 2198 update_tdip = 1; 2199 } 2200 2201 done2: 2202 /* 2203 * If no error, the backend has replaced the target directory entry. 2204 * We must adjust nlinks on the original replace target if it exists. 2205 */ 2206 if (error == 0 && tip) { 2207 int isopen; 2208 2209 isopen = cache_isopen(ap->a_tnch); 2210 hammer2_inode_unlink_finisher(tip, isopen); 2211 } 2212 2213 /* 2214 * Update directory mtimes to represent the something changed. 2215 */ 2216 if (update_fdip || update_tdip) { 2217 uint64_t mtime; 2218 2219 hammer2_update_time(&mtime); 2220 if (update_fdip) { 2221 hammer2_inode_modify(fdip); 2222 fdip->meta.mtime = mtime; 2223 } 2224 if (update_tdip) { 2225 hammer2_inode_modify(tdip); 2226 tdip->meta.mtime = mtime; 2227 } 2228 } 2229 if (tip) { 2230 hammer2_inode_unlock(tip); 2231 hammer2_inode_drop(tip); 2232 } 2233 hammer2_inode_unlock(ip); 2234 hammer2_inode_unlock(tdip); 2235 hammer2_inode_unlock(fdip); 2236 hammer2_inode_drop(ip); 2237 hammer2_trans_done(tdip->pmp, HAMMER2_TRANS_SIDEQ); 2238 2239 /* 2240 * Issue the namecache update after unlocking all the internal 2241 * hammer2 structures, otherwise we might deadlock. 2242 * 2243 * WARNING! The target namespace must be updated atomically, 2244 * and we depend on cache_rename() to handle that for 2245 * us. Do not do a separate cache_unlink() because 2246 * that leaves a small window of opportunity for other 2247 * threads to allocate the target namespace before we 2248 * manage to complete our rename. 2249 * 2250 * WARNING! cache_rename() (and cache_unlink()) will properly 2251 * set VREF_FINALIZE on any attached vnode. Do not 2252 * call cache_setunresolved() manually before-hand as 2253 * this will prevent the flag from being set later via 2254 * cache_rename(). If VREF_FINALIZE is not properly set 2255 * and the inode is no longer in the topology, related 2256 * chains can remain dirty indefinitely. 2257 */ 2258 if (error == 0 && tip) { 2259 /*cache_unlink(ap->a_tnch); see above */ 2260 /*cache_setunresolved(ap->a_tnch); see above */ 2261 } 2262 if (error == 0) { 2263 cache_rename(ap->a_fnch, ap->a_tnch); 2264 hammer2_knote(ap->a_fdvp, NOTE_WRITE); 2265 hammer2_knote(ap->a_tdvp, NOTE_WRITE); 2266 hammer2_knote(fncp->nc_vp, NOTE_RENAME); 2267 } 2268 2269 return (error); 2270 } 2271 2272 /* 2273 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2274 */ 2275 static 2276 int 2277 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2278 { 2279 hammer2_inode_t *ip; 2280 int error; 2281 2282 ip = VTOI(ap->a_vp); 2283 2284 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2285 ap->a_fflag, ap->a_cred); 2286 return (error); 2287 } 2288 2289 static 2290 int 2291 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2292 { 2293 struct mount *mp; 2294 hammer2_pfs_t *pmp; 2295 int rc; 2296 2297 switch (ap->a_op) { 2298 case (MOUNTCTL_SET_EXPORT): 2299 mp = ap->a_head.a_ops->head.vv_mount; 2300 pmp = MPTOPMP(mp); 2301 2302 if (ap->a_ctllen != sizeof(struct export_args)) 2303 rc = (EINVAL); 2304 else 2305 rc = vfs_export(mp, &pmp->export, 2306 (const struct export_args *)ap->a_ctl); 2307 break; 2308 default: 2309 rc = vop_stdmountctl(ap); 2310 break; 2311 } 2312 return (rc); 2313 } 2314 2315 /* 2316 * KQFILTER 2317 */ 2318 static void filt_hammer2detach(struct knote *kn); 2319 static int filt_hammer2read(struct knote *kn, long hint); 2320 static int filt_hammer2write(struct knote *kn, long hint); 2321 static int filt_hammer2vnode(struct knote *kn, long hint); 2322 2323 static struct filterops hammer2read_filtops = 2324 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2325 NULL, filt_hammer2detach, filt_hammer2read }; 2326 static struct filterops hammer2write_filtops = 2327 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2328 NULL, filt_hammer2detach, filt_hammer2write }; 2329 static struct filterops hammer2vnode_filtops = 2330 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2331 NULL, filt_hammer2detach, filt_hammer2vnode }; 2332 2333 static 2334 int 2335 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap) 2336 { 2337 struct vnode *vp = ap->a_vp; 2338 struct knote *kn = ap->a_kn; 2339 2340 switch (kn->kn_filter) { 2341 case EVFILT_READ: 2342 kn->kn_fop = &hammer2read_filtops; 2343 break; 2344 case EVFILT_WRITE: 2345 kn->kn_fop = &hammer2write_filtops; 2346 break; 2347 case EVFILT_VNODE: 2348 kn->kn_fop = &hammer2vnode_filtops; 2349 break; 2350 default: 2351 return (EOPNOTSUPP); 2352 } 2353 2354 kn->kn_hook = (caddr_t)vp; 2355 2356 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2357 2358 return(0); 2359 } 2360 2361 static void 2362 filt_hammer2detach(struct knote *kn) 2363 { 2364 struct vnode *vp = (void *)kn->kn_hook; 2365 2366 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2367 } 2368 2369 static int 2370 filt_hammer2read(struct knote *kn, long hint) 2371 { 2372 struct vnode *vp = (void *)kn->kn_hook; 2373 hammer2_inode_t *ip = VTOI(vp); 2374 off_t off; 2375 2376 if (hint == NOTE_REVOKE) { 2377 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2378 return(1); 2379 } 2380 off = ip->meta.size - kn->kn_fp->f_offset; 2381 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2382 if (kn->kn_sfflags & NOTE_OLDAPI) 2383 return(1); 2384 return (kn->kn_data != 0); 2385 } 2386 2387 2388 static int 2389 filt_hammer2write(struct knote *kn, long hint) 2390 { 2391 if (hint == NOTE_REVOKE) 2392 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2393 kn->kn_data = 0; 2394 return (1); 2395 } 2396 2397 static int 2398 filt_hammer2vnode(struct knote *kn, long hint) 2399 { 2400 if (kn->kn_sfflags & hint) 2401 kn->kn_fflags |= hint; 2402 if (hint == NOTE_REVOKE) { 2403 kn->kn_flags |= (EV_EOF | EV_NODATA); 2404 return (1); 2405 } 2406 return (kn->kn_fflags != 0); 2407 } 2408 2409 /* 2410 * FIFO VOPS 2411 */ 2412 static 2413 int 2414 hammer2_vop_markatime(struct vop_markatime_args *ap) 2415 { 2416 hammer2_inode_t *ip; 2417 struct vnode *vp; 2418 2419 vp = ap->a_vp; 2420 ip = VTOI(vp); 2421 2422 if (ip->pmp->ronly) 2423 return (EROFS); 2424 return(0); 2425 } 2426 2427 static 2428 int 2429 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap) 2430 { 2431 int error; 2432 2433 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2434 if (error) 2435 error = hammer2_vop_kqfilter(ap); 2436 return(error); 2437 } 2438 2439 /* 2440 * VOPS vector 2441 */ 2442 struct vop_ops hammer2_vnode_vops = { 2443 .vop_default = vop_defaultop, 2444 .vop_fsync = hammer2_vop_fsync, 2445 .vop_getpages = vop_stdgetpages, 2446 .vop_putpages = vop_stdputpages, 2447 .vop_access = hammer2_vop_access, 2448 .vop_advlock = hammer2_vop_advlock, 2449 .vop_close = hammer2_vop_close, 2450 .vop_nlink = hammer2_vop_nlink, 2451 .vop_ncreate = hammer2_vop_ncreate, 2452 .vop_nsymlink = hammer2_vop_nsymlink, 2453 .vop_nremove = hammer2_vop_nremove, 2454 .vop_nrmdir = hammer2_vop_nrmdir, 2455 .vop_nrename = hammer2_vop_nrename, 2456 .vop_getattr = hammer2_vop_getattr, 2457 .vop_setattr = hammer2_vop_setattr, 2458 .vop_readdir = hammer2_vop_readdir, 2459 .vop_readlink = hammer2_vop_readlink, 2460 .vop_read = hammer2_vop_read, 2461 .vop_write = hammer2_vop_write, 2462 .vop_open = hammer2_vop_open, 2463 .vop_inactive = hammer2_vop_inactive, 2464 .vop_reclaim = hammer2_vop_reclaim, 2465 .vop_nresolve = hammer2_vop_nresolve, 2466 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2467 .vop_nmkdir = hammer2_vop_nmkdir, 2468 .vop_nmknod = hammer2_vop_nmknod, 2469 .vop_ioctl = hammer2_vop_ioctl, 2470 .vop_mountctl = hammer2_vop_mountctl, 2471 .vop_bmap = hammer2_vop_bmap, 2472 .vop_strategy = hammer2_vop_strategy, 2473 .vop_kqfilter = hammer2_vop_kqfilter 2474 }; 2475 2476 struct vop_ops hammer2_spec_vops = { 2477 .vop_default = vop_defaultop, 2478 .vop_fsync = hammer2_vop_fsync, 2479 .vop_read = vop_stdnoread, 2480 .vop_write = vop_stdnowrite, 2481 .vop_access = hammer2_vop_access, 2482 .vop_close = hammer2_vop_close, 2483 .vop_markatime = hammer2_vop_markatime, 2484 .vop_getattr = hammer2_vop_getattr, 2485 .vop_inactive = hammer2_vop_inactive, 2486 .vop_reclaim = hammer2_vop_reclaim, 2487 .vop_setattr = hammer2_vop_setattr 2488 }; 2489 2490 struct vop_ops hammer2_fifo_vops = { 2491 .vop_default = fifo_vnoperate, 2492 .vop_fsync = hammer2_vop_fsync, 2493 #if 0 2494 .vop_read = hammer2_vop_fiforead, 2495 .vop_write = hammer2_vop_fifowrite, 2496 #endif 2497 .vop_access = hammer2_vop_access, 2498 #if 0 2499 .vop_close = hammer2_vop_fifoclose, 2500 #endif 2501 .vop_markatime = hammer2_vop_markatime, 2502 .vop_getattr = hammer2_vop_getattr, 2503 .vop_inactive = hammer2_vop_inactive, 2504 .vop_reclaim = hammer2_vop_reclaim, 2505 .vop_setattr = hammer2_vop_setattr, 2506 .vop_kqfilter = hammer2_vop_fifokqfilter 2507 }; 2508 2509