1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/mountctl.h> 53 #include <sys/dirent.h> 54 #include <sys/uio.h> 55 #include <sys/objcache.h> 56 #include <sys/event.h> 57 #include <sys/file.h> 58 #include <vfs/fifofs/fifo.h> 59 60 #include "hammer2.h" 61 #include "hammer2_lz4.h" 62 63 #include "zlib/hammer2_zlib.h" 64 65 #define ZFOFFSET (-2LL) 66 67 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 68 int seqcount); 69 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 70 int ioflag, int seqcount); 71 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 72 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 73 74 struct objcache *cache_buffer_read; 75 struct objcache *cache_buffer_write; 76 77 /* 78 * Callback used in read path in case that a block is compressed with LZ4. 79 */ 80 static 81 void 82 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio) 83 { 84 struct buf *bp; 85 char *compressed_buffer; 86 int compressed_size; 87 int result; 88 89 bp = bio->bio_buf; 90 91 #if 0 92 if bio->bio_caller_info2.index && 93 bio->bio_caller_info1.uvalue32 != 94 crc32(bp->b_data, bp->b_bufsize) --- return error 95 #endif 96 97 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 98 compressed_size = *(const int *)data; 99 KKASSERT(compressed_size <= bytes - sizeof(int)); 100 101 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 102 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]), 103 compressed_buffer, 104 compressed_size, 105 bp->b_bufsize); 106 if (result < 0) { 107 kprintf("READ PATH: Error during decompression." 108 "bio %016jx/%d\n", 109 (intmax_t)bio->bio_offset, bytes); 110 /* make sure it isn't random garbage */ 111 bzero(compressed_buffer, bp->b_bufsize); 112 } 113 KKASSERT(result <= bp->b_bufsize); 114 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 115 if (result < bp->b_bufsize) 116 bzero(bp->b_data + result, bp->b_bufsize - result); 117 objcache_put(cache_buffer_read, compressed_buffer); 118 bp->b_resid = 0; 119 bp->b_flags |= B_AGE; 120 } 121 122 /* 123 * Callback used in read path in case that a block is compressed with ZLIB. 124 * It is almost identical to LZ4 callback, so in theory they can be unified, 125 * but we didn't want to make changes in bio structure for that. 126 */ 127 static 128 void 129 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio) 130 { 131 struct buf *bp; 132 char *compressed_buffer; 133 z_stream strm_decompress; 134 int result; 135 int ret; 136 137 bp = bio->bio_buf; 138 139 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 140 strm_decompress.avail_in = 0; 141 strm_decompress.next_in = Z_NULL; 142 143 ret = inflateInit(&strm_decompress); 144 145 if (ret != Z_OK) 146 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); 147 148 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 149 strm_decompress.next_in = __DECONST(char *, data); 150 151 /* XXX supply proper size, subset of device bp */ 152 strm_decompress.avail_in = bytes; 153 strm_decompress.next_out = compressed_buffer; 154 strm_decompress.avail_out = bp->b_bufsize; 155 156 ret = inflate(&strm_decompress, Z_FINISH); 157 if (ret != Z_STREAM_END) { 158 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); 159 bzero(compressed_buffer, bp->b_bufsize); 160 } 161 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 162 result = bp->b_bufsize - strm_decompress.avail_out; 163 if (result < bp->b_bufsize) 164 bzero(bp->b_data + result, strm_decompress.avail_out); 165 objcache_put(cache_buffer_read, compressed_buffer); 166 ret = inflateEnd(&strm_decompress); 167 168 bp->b_resid = 0; 169 bp->b_flags |= B_AGE; 170 } 171 172 static __inline 173 void 174 hammer2_knote(struct vnode *vp, int flags) 175 { 176 if (flags) 177 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 178 } 179 180 /* 181 * Last reference to a vnode is going away but it is still cached. 182 */ 183 static 184 int 185 hammer2_vop_inactive(struct vop_inactive_args *ap) 186 { 187 const hammer2_inode_data_t *ripdata; 188 hammer2_inode_t *ip; 189 hammer2_cluster_t *cluster; 190 struct vnode *vp; 191 192 LOCKSTART; 193 vp = ap->a_vp; 194 ip = VTOI(vp); 195 196 /* 197 * Degenerate case 198 */ 199 if (ip == NULL) { 200 vrecycle(vp); 201 LOCKSTOP; 202 return (0); 203 } 204 205 /* 206 * Detect updates to the embedded data which may be synchronized by 207 * the strategy code. Simply mark the inode modified so it gets 208 * picked up by our normal flush. 209 */ 210 cluster = hammer2_inode_lock_ex(ip); 211 KKASSERT(cluster); 212 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 213 214 /* 215 * Check for deleted inodes and recycle immediately. 216 * 217 * WARNING: nvtruncbuf() can only be safely called without the inode 218 * lock held due to the way our write thread works. 219 */ 220 if (ripdata->nlinks == 0) { 221 hammer2_key_t lbase; 222 int nblksize; 223 224 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL); 225 hammer2_inode_unlock_ex(ip, cluster); 226 nvtruncbuf(vp, 0, nblksize, 0, 0); 227 vrecycle(vp); 228 } else { 229 hammer2_inode_unlock_ex(ip, cluster); 230 } 231 LOCKSTOP; 232 return (0); 233 } 234 235 /* 236 * Reclaim a vnode so that it can be reused; after the inode is 237 * disassociated, the filesystem must manage it alone. 238 */ 239 static 240 int 241 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 242 { 243 const hammer2_inode_data_t *ripdata; 244 hammer2_cluster_t *cluster; 245 hammer2_inode_t *ip; 246 hammer2_pfsmount_t *pmp; 247 struct vnode *vp; 248 249 LOCKSTART; 250 vp = ap->a_vp; 251 ip = VTOI(vp); 252 if (ip == NULL) { 253 LOCKSTOP; 254 return(0); 255 } 256 257 /* 258 * Inode must be locked for reclaim. 259 */ 260 pmp = ip->pmp; 261 cluster = hammer2_inode_lock_ex(ip); 262 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 263 264 /* 265 * The final close of a deleted file or directory marks it for 266 * destruction. The DELETED flag allows the flusher to shortcut 267 * any modified blocks still unflushed (that is, just ignore them). 268 * 269 * HAMMER2 usually does not try to optimize the freemap by returning 270 * deleted blocks to it as it does not usually know how many snapshots 271 * might be referencing portions of the file/dir. 272 */ 273 vp->v_data = NULL; 274 ip->vp = NULL; 275 276 /* 277 * NOTE! We do not attempt to flush chains here, flushing is 278 * really fragile and could also deadlock. 279 */ 280 vclrisdirty(vp); 281 282 /* 283 * A reclaim can occur at any time so we cannot safely start a 284 * transaction to handle reclamation of unlinked files. Instead, 285 * the ip is left with a reference and placed on a linked list and 286 * handled later on. 287 */ 288 if (ripdata->nlinks == 0) { 289 hammer2_inode_unlink_t *ipul; 290 291 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO); 292 ipul->ip = ip; 293 294 spin_lock(&pmp->list_spin); 295 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry); 296 spin_unlock(&pmp->list_spin); 297 hammer2_inode_unlock_ex(ip, cluster); /* unlock */ 298 /* retain ref from vp for ipul */ 299 } else { 300 hammer2_inode_unlock_ex(ip, cluster); /* unlock */ 301 hammer2_inode_drop(ip); /* vp ref */ 302 } 303 /* cluster no longer referenced */ 304 /* cluster = NULL; not needed */ 305 306 /* 307 * XXX handle background sync when ip dirty, kernel will no longer 308 * notify us regarding this inode because there is no longer a 309 * vnode attached to it. 310 */ 311 312 LOCKSTOP; 313 return (0); 314 } 315 316 static 317 int 318 hammer2_vop_fsync(struct vop_fsync_args *ap) 319 { 320 hammer2_inode_t *ip; 321 hammer2_trans_t trans; 322 hammer2_cluster_t *cluster; 323 struct vnode *vp; 324 325 LOCKSTART; 326 vp = ap->a_vp; 327 ip = VTOI(vp); 328 329 #if 0 330 /* XXX can't do this yet */ 331 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 332 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 333 #endif 334 hammer2_trans_init(&trans, ip->pmp, 0); 335 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 336 337 /* 338 * Calling chain_flush here creates a lot of duplicative 339 * COW operations due to non-optimal vnode ordering. 340 * 341 * Only do it for an actual fsync() syscall. The other forms 342 * which call this function will eventually call chain_flush 343 * on the volume root as a catch-all, which is far more optimal. 344 */ 345 cluster = hammer2_inode_lock_ex(ip); 346 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 347 vclrisdirty(vp); 348 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME)) 349 hammer2_inode_fsync(&trans, ip, cluster); 350 351 #if 0 352 /* 353 * XXX creates discontinuity w/modify_tid 354 */ 355 if (ap->a_flags & VOP_FSYNC_SYSCALL) { 356 hammer2_flush(&trans, cluster); 357 } 358 #endif 359 hammer2_inode_unlock_ex(ip, cluster); 360 hammer2_trans_done(&trans); 361 362 LOCKSTOP; 363 return (0); 364 } 365 366 static 367 int 368 hammer2_vop_access(struct vop_access_args *ap) 369 { 370 hammer2_inode_t *ip = VTOI(ap->a_vp); 371 const hammer2_inode_data_t *ripdata; 372 hammer2_cluster_t *cluster; 373 uid_t uid; 374 gid_t gid; 375 int error; 376 377 LOCKSTART; 378 cluster = hammer2_inode_lock_sh(ip); 379 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 380 uid = hammer2_to_unix_xid(&ripdata->uid); 381 gid = hammer2_to_unix_xid(&ripdata->gid); 382 error = vop_helper_access(ap, uid, gid, ripdata->mode, ripdata->uflags); 383 hammer2_inode_unlock_sh(ip, cluster); 384 385 LOCKSTOP; 386 return (error); 387 } 388 389 static 390 int 391 hammer2_vop_getattr(struct vop_getattr_args *ap) 392 { 393 const hammer2_inode_data_t *ripdata; 394 hammer2_cluster_t *cluster; 395 hammer2_pfsmount_t *pmp; 396 hammer2_inode_t *ip; 397 struct vnode *vp; 398 struct vattr *vap; 399 400 LOCKSTART; 401 vp = ap->a_vp; 402 vap = ap->a_vap; 403 404 ip = VTOI(vp); 405 pmp = ip->pmp; 406 407 cluster = hammer2_inode_lock_sh(ip); 408 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 409 KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); 410 411 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 412 vap->va_fileid = ripdata->inum; 413 vap->va_mode = ripdata->mode; 414 vap->va_nlink = ripdata->nlinks; 415 vap->va_uid = hammer2_to_unix_xid(&ripdata->uid); 416 vap->va_gid = hammer2_to_unix_xid(&ripdata->gid); 417 vap->va_rmajor = 0; 418 vap->va_rminor = 0; 419 vap->va_size = ip->size; /* protected by shared lock */ 420 vap->va_blocksize = HAMMER2_PBUFSIZE; 421 vap->va_flags = ripdata->uflags; 422 hammer2_time_to_timespec(ripdata->ctime, &vap->va_ctime); 423 hammer2_time_to_timespec(ripdata->mtime, &vap->va_mtime); 424 hammer2_time_to_timespec(ripdata->mtime, &vap->va_atime); 425 vap->va_gen = 1; 426 vap->va_bytes = vap->va_size; /* XXX */ 427 vap->va_type = hammer2_get_vtype(ripdata); 428 vap->va_filerev = 0; 429 vap->va_uid_uuid = ripdata->uid; 430 vap->va_gid_uuid = ripdata->gid; 431 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 432 VA_FSID_UUID_VALID; 433 434 hammer2_inode_unlock_sh(ip, cluster); 435 436 LOCKSTOP; 437 return (0); 438 } 439 440 static 441 int 442 hammer2_vop_setattr(struct vop_setattr_args *ap) 443 { 444 const hammer2_inode_data_t *ripdata; 445 hammer2_inode_data_t *wipdata; 446 hammer2_inode_t *ip; 447 hammer2_cluster_t *cluster; 448 hammer2_trans_t trans; 449 struct vnode *vp; 450 struct vattr *vap; 451 int error; 452 int kflags = 0; 453 int domtime = 0; 454 int dosync = 0; 455 uint64_t ctime; 456 457 LOCKSTART; 458 vp = ap->a_vp; 459 vap = ap->a_vap; 460 hammer2_update_time(&ctime); 461 462 ip = VTOI(vp); 463 464 if (ip->pmp->ronly) { 465 LOCKSTOP; 466 return(EROFS); 467 } 468 469 hammer2_pfs_memory_wait(ip->pmp); 470 hammer2_trans_init(&trans, ip->pmp, 0); 471 cluster = hammer2_inode_lock_ex(ip); 472 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 473 error = 0; 474 475 if (vap->va_flags != VNOVAL) { 476 u_int32_t flags; 477 478 flags = ripdata->uflags; 479 error = vop_helper_setattr_flags(&flags, vap->va_flags, 480 hammer2_to_unix_xid(&ripdata->uid), 481 ap->a_cred); 482 if (error == 0) { 483 if (ripdata->uflags != flags) { 484 wipdata = hammer2_cluster_modify_ip(&trans, ip, 485 cluster, 0); 486 wipdata->uflags = flags; 487 wipdata->ctime = ctime; 488 kflags |= NOTE_ATTRIB; 489 dosync = 1; 490 ripdata = wipdata; 491 } 492 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 493 error = 0; 494 goto done; 495 } 496 } 497 goto done; 498 } 499 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 500 error = EPERM; 501 goto done; 502 } 503 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 504 mode_t cur_mode = ripdata->mode; 505 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 506 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 507 uuid_t uuid_uid; 508 uuid_t uuid_gid; 509 510 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 511 ap->a_cred, 512 &cur_uid, &cur_gid, &cur_mode); 513 if (error == 0) { 514 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 515 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 516 if (bcmp(&uuid_uid, &ripdata->uid, sizeof(uuid_uid)) || 517 bcmp(&uuid_gid, &ripdata->gid, sizeof(uuid_gid)) || 518 ripdata->mode != cur_mode 519 ) { 520 wipdata = hammer2_cluster_modify_ip(&trans, ip, 521 cluster, 0); 522 wipdata->uid = uuid_uid; 523 wipdata->gid = uuid_gid; 524 wipdata->mode = cur_mode; 525 wipdata->ctime = ctime; 526 dosync = 1; 527 ripdata = wipdata; 528 } 529 kflags |= NOTE_ATTRIB; 530 } 531 } 532 533 /* 534 * Resize the file 535 */ 536 if (vap->va_size != VNOVAL && ip->size != vap->va_size) { 537 switch(vp->v_type) { 538 case VREG: 539 if (vap->va_size == ip->size) 540 break; 541 hammer2_inode_unlock_ex(ip, cluster); 542 if (vap->va_size < ip->size) { 543 hammer2_truncate_file(ip, vap->va_size); 544 } else { 545 hammer2_extend_file(ip, vap->va_size); 546 } 547 cluster = hammer2_inode_lock_ex(ip); 548 /* RELOAD */ 549 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 550 domtime = 1; 551 break; 552 default: 553 error = EINVAL; 554 goto done; 555 } 556 } 557 #if 0 558 /* atime not supported */ 559 if (vap->va_atime.tv_sec != VNOVAL) { 560 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 561 wipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 562 kflags |= NOTE_ATTRIB; 563 dosync = 1; 564 ripdata = wipdata; 565 } 566 #endif 567 if (vap->va_mtime.tv_sec != VNOVAL) { 568 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 569 wipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 570 kflags |= NOTE_ATTRIB; 571 domtime = 0; 572 dosync = 1; 573 ripdata = wipdata; 574 } 575 if (vap->va_mode != (mode_t)VNOVAL) { 576 mode_t cur_mode = ripdata->mode; 577 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 578 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 579 580 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 581 cur_uid, cur_gid, &cur_mode); 582 if (error == 0 && ripdata->mode != cur_mode) { 583 wipdata = hammer2_cluster_modify_ip(&trans, ip, 584 cluster, 0); 585 wipdata->mode = cur_mode; 586 wipdata->ctime = ctime; 587 kflags |= NOTE_ATTRIB; 588 dosync = 1; 589 ripdata = wipdata; 590 } 591 } 592 593 /* 594 * If a truncation occurred we must call inode_fsync() now in order 595 * to trim the related data chains, otherwise a later expansion can 596 * cause havoc. 597 */ 598 if (dosync) { 599 hammer2_cluster_modsync(cluster); 600 dosync = 0; 601 } 602 hammer2_inode_fsync(&trans, ip, cluster); 603 604 /* 605 * Cleanup. If domtime is set an additional inode modification 606 * must be flagged. All other modifications will have already 607 * set INODE_MODIFIED and called vsetisdirty(). 608 */ 609 done: 610 if (domtime) { 611 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED | 612 HAMMER2_INODE_MTIME); 613 vsetisdirty(ip->vp); 614 } 615 if (dosync) 616 hammer2_cluster_modsync(cluster); 617 hammer2_inode_unlock_ex(ip, cluster); 618 hammer2_trans_done(&trans); 619 hammer2_knote(ip->vp, kflags); 620 621 LOCKSTOP; 622 return (error); 623 } 624 625 static 626 int 627 hammer2_vop_readdir(struct vop_readdir_args *ap) 628 { 629 const hammer2_inode_data_t *ripdata; 630 hammer2_inode_t *ip; 631 hammer2_inode_t *xip; 632 hammer2_cluster_t *cparent; 633 hammer2_cluster_t *cluster; 634 hammer2_cluster_t *xcluster; 635 hammer2_blockref_t bref; 636 hammer2_tid_t inum; 637 hammer2_key_t key_next; 638 hammer2_key_t lkey; 639 struct uio *uio; 640 off_t *cookies; 641 off_t saveoff; 642 int cookie_index; 643 int ncookies; 644 int error; 645 int dtype; 646 int ddflag; 647 int r; 648 649 LOCKSTART; 650 ip = VTOI(ap->a_vp); 651 uio = ap->a_uio; 652 saveoff = uio->uio_offset; 653 654 /* 655 * Setup cookies directory entry cookies if requested 656 */ 657 if (ap->a_ncookies) { 658 ncookies = uio->uio_resid / 16 + 1; 659 if (ncookies > 1024) 660 ncookies = 1024; 661 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 662 } else { 663 ncookies = -1; 664 cookies = NULL; 665 } 666 cookie_index = 0; 667 668 cparent = hammer2_inode_lock_sh(ip); 669 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 670 671 /* 672 * Handle artificial entries. To ensure that only positive 64 bit 673 * quantities are returned to userland we always strip off bit 63. 674 * The hash code is designed such that codes 0x0000-0x7FFF are not 675 * used, allowing us to use these codes for articial entries. 676 * 677 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 678 * allow '..' to cross the mount point into (e.g.) the super-root. 679 */ 680 error = 0; 681 cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 682 683 if (saveoff == 0) { 684 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK; 685 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 686 if (r) 687 goto done; 688 if (cookies) 689 cookies[cookie_index] = saveoff; 690 ++saveoff; 691 ++cookie_index; 692 if (cookie_index == ncookies) 693 goto done; 694 } 695 696 if (saveoff == 1) { 697 /* 698 * Be careful with lockorder when accessing ".." 699 * 700 * (ip is the current dir. xip is the parent dir). 701 */ 702 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK; 703 while (ip->pip != NULL && ip != ip->pmp->iroot) { 704 xip = ip->pip; 705 hammer2_inode_ref(xip); 706 hammer2_inode_unlock_sh(ip, cparent); 707 xcluster = hammer2_inode_lock_sh(xip); 708 cparent = hammer2_inode_lock_sh(ip); 709 hammer2_inode_drop(xip); 710 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 711 if (xip == ip->pip) { 712 inum = hammer2_cluster_rdata(xcluster)-> 713 ipdata.inum & HAMMER2_DIRHASH_USERMSK; 714 hammer2_inode_unlock_sh(xip, xcluster); 715 break; 716 } 717 hammer2_inode_unlock_sh(xip, xcluster); 718 } 719 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 720 if (r) 721 goto done; 722 if (cookies) 723 cookies[cookie_index] = saveoff; 724 ++saveoff; 725 ++cookie_index; 726 if (cookie_index == ncookies) 727 goto done; 728 } 729 730 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 731 if (hammer2_debug & 0x0020) 732 kprintf("readdir: lkey %016jx\n", lkey); 733 734 /* 735 * parent is the inode cluster, already locked for us. Don't 736 * double lock shared locks as this will screw up upgrades. 737 */ 738 if (error) { 739 goto done; 740 } 741 cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey, 742 HAMMER2_LOOKUP_SHARED, &ddflag); 743 if (cluster == NULL) { 744 cluster = hammer2_cluster_lookup(cparent, &key_next, 745 lkey, (hammer2_key_t)-1, 746 HAMMER2_LOOKUP_SHARED, &ddflag); 747 } 748 if (cluster) 749 hammer2_cluster_bref(cluster, &bref); 750 while (cluster) { 751 if (hammer2_debug & 0x0020) 752 kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n", 753 cparent->focus, cluster->focus, 754 bref.key, key_next); 755 756 if (bref.type == HAMMER2_BREF_TYPE_INODE) { 757 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 758 dtype = hammer2_get_dtype(ripdata); 759 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 760 r = vop_write_dirent(&error, uio, 761 ripdata->inum & 762 HAMMER2_DIRHASH_USERMSK, 763 dtype, 764 ripdata->name_len, 765 ripdata->filename); 766 if (r) 767 break; 768 if (cookies) 769 cookies[cookie_index] = saveoff; 770 ++cookie_index; 771 } else { 772 /* XXX chain error */ 773 kprintf("bad chain type readdir %d\n", bref.type); 774 } 775 776 /* 777 * Keys may not be returned in order so once we have a 778 * placemarker (cluster) the scan must allow the full range 779 * or some entries will be missed. 780 */ 781 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 782 key_next, (hammer2_key_t)-1, 783 HAMMER2_LOOKUP_SHARED); 784 if (cluster) { 785 hammer2_cluster_bref(cluster, &bref); 786 saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1; 787 } else { 788 saveoff = (hammer2_key_t)-1; 789 } 790 if (cookie_index == ncookies) 791 break; 792 } 793 if (cluster) 794 hammer2_cluster_unlock(cluster); 795 done: 796 hammer2_inode_unlock_sh(ip, cparent); 797 if (ap->a_eofflag) 798 *ap->a_eofflag = (cluster == NULL); 799 if (hammer2_debug & 0x0020) 800 kprintf("readdir: done at %016jx\n", saveoff); 801 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 802 if (error && cookie_index == 0) { 803 if (cookies) { 804 kfree(cookies, M_TEMP); 805 *ap->a_ncookies = 0; 806 *ap->a_cookies = NULL; 807 } 808 } else { 809 if (cookies) { 810 *ap->a_ncookies = cookie_index; 811 *ap->a_cookies = cookies; 812 } 813 } 814 LOCKSTOP; 815 return (error); 816 } 817 818 /* 819 * hammer2_vop_readlink { vp, uio, cred } 820 */ 821 static 822 int 823 hammer2_vop_readlink(struct vop_readlink_args *ap) 824 { 825 struct vnode *vp; 826 hammer2_inode_t *ip; 827 int error; 828 829 vp = ap->a_vp; 830 if (vp->v_type != VLNK) 831 return (EINVAL); 832 ip = VTOI(vp); 833 834 error = hammer2_read_file(ip, ap->a_uio, 0); 835 return (error); 836 } 837 838 static 839 int 840 hammer2_vop_read(struct vop_read_args *ap) 841 { 842 struct vnode *vp; 843 hammer2_inode_t *ip; 844 struct uio *uio; 845 int error; 846 int seqcount; 847 int bigread; 848 849 /* 850 * Read operations supported on this vnode? 851 */ 852 vp = ap->a_vp; 853 if (vp->v_type != VREG) 854 return (EINVAL); 855 856 /* 857 * Misc 858 */ 859 ip = VTOI(vp); 860 uio = ap->a_uio; 861 error = 0; 862 863 seqcount = ap->a_ioflag >> 16; 864 bigread = (uio->uio_resid > 100 * 1024 * 1024); 865 866 error = hammer2_read_file(ip, uio, seqcount); 867 return (error); 868 } 869 870 static 871 int 872 hammer2_vop_write(struct vop_write_args *ap) 873 { 874 hammer2_inode_t *ip; 875 hammer2_trans_t trans; 876 thread_t td; 877 struct vnode *vp; 878 struct uio *uio; 879 int error; 880 int seqcount; 881 int bigwrite; 882 883 /* 884 * Read operations supported on this vnode? 885 */ 886 vp = ap->a_vp; 887 if (vp->v_type != VREG) 888 return (EINVAL); 889 890 /* 891 * Misc 892 */ 893 ip = VTOI(vp); 894 uio = ap->a_uio; 895 error = 0; 896 if (ip->pmp->ronly) { 897 return (EROFS); 898 } 899 900 seqcount = ap->a_ioflag >> 16; 901 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 902 903 /* 904 * Check resource limit 905 */ 906 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 907 uio->uio_offset + uio->uio_resid > 908 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 909 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 910 return (EFBIG); 911 } 912 913 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 914 915 /* 916 * The transaction interlocks against flushes initiations 917 * (note: but will run concurrently with the actual flush). 918 */ 919 hammer2_trans_init(&trans, ip->pmp, 0); 920 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount); 921 hammer2_trans_done(&trans); 922 923 return (error); 924 } 925 926 /* 927 * Perform read operations on a file or symlink given an UNLOCKED 928 * inode and uio. 929 * 930 * The passed ip is not locked. 931 */ 932 static 933 int 934 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 935 { 936 hammer2_off_t size; 937 struct buf *bp; 938 int error; 939 940 error = 0; 941 942 /* 943 * UIO read loop. 944 */ 945 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 946 size = ip->size; 947 ccms_thread_unlock(&ip->topo_cst); 948 949 while (uio->uio_resid > 0 && uio->uio_offset < size) { 950 hammer2_key_t lbase; 951 hammer2_key_t leof; 952 int lblksize; 953 int loff; 954 int n; 955 956 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 957 &lbase, &leof); 958 959 error = cluster_read(ip->vp, leof, lbase, lblksize, 960 uio->uio_resid, seqcount * BKVASIZE, 961 &bp); 962 963 if (error) 964 break; 965 loff = (int)(uio->uio_offset - lbase); 966 n = lblksize - loff; 967 if (n > uio->uio_resid) 968 n = uio->uio_resid; 969 if (n > size - uio->uio_offset) 970 n = (int)(size - uio->uio_offset); 971 bp->b_flags |= B_AGE; 972 uiomove((char *)bp->b_data + loff, n, uio); 973 bqrelse(bp); 974 } 975 return (error); 976 } 977 978 /* 979 * Write to the file represented by the inode via the logical buffer cache. 980 * The inode may represent a regular file or a symlink. 981 * 982 * The inode must not be locked. 983 */ 984 static 985 int 986 hammer2_write_file(hammer2_inode_t *ip, 987 struct uio *uio, int ioflag, int seqcount) 988 { 989 hammer2_key_t old_eof; 990 hammer2_key_t new_eof; 991 struct buf *bp; 992 int kflags; 993 int error; 994 int modified; 995 996 /* 997 * Setup if append 998 */ 999 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1000 if (ioflag & IO_APPEND) 1001 uio->uio_offset = ip->size; 1002 old_eof = ip->size; 1003 ccms_thread_unlock(&ip->topo_cst); 1004 1005 /* 1006 * Extend the file if necessary. If the write fails at some point 1007 * we will truncate it back down to cover as much as we were able 1008 * to write. 1009 * 1010 * Doing this now makes it easier to calculate buffer sizes in 1011 * the loop. 1012 */ 1013 kflags = 0; 1014 error = 0; 1015 modified = 0; 1016 1017 if (uio->uio_offset + uio->uio_resid > old_eof) { 1018 new_eof = uio->uio_offset + uio->uio_resid; 1019 modified = 1; 1020 hammer2_extend_file(ip, new_eof); 1021 kflags |= NOTE_EXTEND; 1022 } else { 1023 new_eof = old_eof; 1024 } 1025 1026 /* 1027 * UIO write loop 1028 */ 1029 while (uio->uio_resid > 0) { 1030 hammer2_key_t lbase; 1031 int trivial; 1032 int endofblk; 1033 int lblksize; 1034 int loff; 1035 int n; 1036 1037 /* 1038 * Don't allow the buffer build to blow out the buffer 1039 * cache. 1040 */ 1041 if ((ioflag & IO_RECURSE) == 0) 1042 bwillwrite(HAMMER2_PBUFSIZE); 1043 1044 /* 1045 * This nominally tells us how much we can cluster and 1046 * what the logical buffer size needs to be. Currently 1047 * we don't try to cluster the write and just handle one 1048 * block at a time. 1049 */ 1050 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1051 &lbase, NULL); 1052 loff = (int)(uio->uio_offset - lbase); 1053 1054 KKASSERT(lblksize <= 65536); 1055 1056 /* 1057 * Calculate bytes to copy this transfer and whether the 1058 * copy completely covers the buffer or not. 1059 */ 1060 trivial = 0; 1061 n = lblksize - loff; 1062 if (n > uio->uio_resid) { 1063 n = uio->uio_resid; 1064 if (loff == lbase && uio->uio_offset + n == new_eof) 1065 trivial = 1; 1066 endofblk = 0; 1067 } else { 1068 if (loff == 0) 1069 trivial = 1; 1070 endofblk = 1; 1071 } 1072 1073 /* 1074 * Get the buffer 1075 */ 1076 if (uio->uio_segflg == UIO_NOCOPY) { 1077 /* 1078 * Issuing a write with the same data backing the 1079 * buffer. Instantiate the buffer to collect the 1080 * backing vm pages, then read-in any missing bits. 1081 * 1082 * This case is used by vop_stdputpages(). 1083 */ 1084 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1085 if ((bp->b_flags & B_CACHE) == 0) { 1086 bqrelse(bp); 1087 error = bread(ip->vp, lbase, lblksize, &bp); 1088 } 1089 } else if (trivial) { 1090 /* 1091 * Even though we are entirely overwriting the buffer 1092 * we may still have to zero it out to avoid a 1093 * mmap/write visibility issue. 1094 */ 1095 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1096 if ((bp->b_flags & B_CACHE) == 0) 1097 vfs_bio_clrbuf(bp); 1098 } else { 1099 /* 1100 * Partial overwrite, read in any missing bits then 1101 * replace the portion being written. 1102 * 1103 * (The strategy code will detect zero-fill physical 1104 * blocks for this case). 1105 */ 1106 error = bread(ip->vp, lbase, lblksize, &bp); 1107 if (error == 0) 1108 bheavy(bp); 1109 } 1110 1111 if (error) { 1112 brelse(bp); 1113 break; 1114 } 1115 1116 /* 1117 * Ok, copy the data in 1118 */ 1119 error = uiomove(bp->b_data + loff, n, uio); 1120 kflags |= NOTE_WRITE; 1121 modified = 1; 1122 if (error) { 1123 brelse(bp); 1124 break; 1125 } 1126 1127 /* 1128 * WARNING: Pageout daemon will issue UIO_NOCOPY writes 1129 * with IO_SYNC or IO_ASYNC set. These writes 1130 * must be handled as the pageout daemon expects. 1131 */ 1132 if (ioflag & IO_SYNC) { 1133 bwrite(bp); 1134 } else if ((ioflag & IO_DIRECT) && endofblk) { 1135 bawrite(bp); 1136 } else if (ioflag & IO_ASYNC) { 1137 bawrite(bp); 1138 } else { 1139 bdwrite(bp); 1140 } 1141 } 1142 1143 /* 1144 * Cleanup. If we extended the file EOF but failed to write through 1145 * the entire write is a failure and we have to back-up. 1146 */ 1147 if (error && new_eof != old_eof) { 1148 hammer2_truncate_file(ip, old_eof); 1149 } else if (modified) { 1150 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1151 hammer2_update_time(&ip->mtime); 1152 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME); 1153 ccms_thread_unlock(&ip->topo_cst); 1154 } 1155 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1156 hammer2_knote(ip->vp, kflags); 1157 vsetisdirty(ip->vp); 1158 1159 return error; 1160 } 1161 1162 /* 1163 * Truncate the size of a file. The inode must not be locked. 1164 * 1165 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1166 * 1167 * WARNING: nvtruncbuf() can only be safely called without the inode lock 1168 * held due to the way our write thread works. 1169 */ 1170 static 1171 void 1172 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1173 { 1174 hammer2_key_t lbase; 1175 int nblksize; 1176 1177 LOCKSTART; 1178 if (ip->vp) { 1179 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1180 nvtruncbuf(ip->vp, nsize, 1181 nblksize, (int)nsize & (nblksize - 1), 1182 0); 1183 } 1184 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1185 ip->size = nsize; 1186 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1187 ccms_thread_unlock(&ip->topo_cst); 1188 LOCKSTOP; 1189 } 1190 1191 /* 1192 * Extend the size of a file. The inode must not be locked. 1193 * 1194 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1195 */ 1196 static 1197 void 1198 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1199 { 1200 hammer2_key_t lbase; 1201 hammer2_key_t osize; 1202 int oblksize; 1203 int nblksize; 1204 1205 LOCKSTART; 1206 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1207 osize = ip->size; 1208 ip->size = nsize; 1209 ccms_thread_unlock(&ip->topo_cst); 1210 1211 if (ip->vp) { 1212 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1213 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1214 nvextendbuf(ip->vp, 1215 osize, nsize, 1216 oblksize, nblksize, 1217 -1, -1, 0); 1218 } 1219 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1220 LOCKSTOP; 1221 } 1222 1223 static 1224 int 1225 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1226 { 1227 hammer2_inode_t *ip; 1228 hammer2_inode_t *dip; 1229 hammer2_cluster_t *cparent; 1230 hammer2_cluster_t *cluster; 1231 const hammer2_inode_data_t *ripdata; 1232 hammer2_key_t key_next; 1233 hammer2_key_t lhc; 1234 struct namecache *ncp; 1235 const uint8_t *name; 1236 size_t name_len; 1237 int error = 0; 1238 int ddflag; 1239 struct vnode *vp; 1240 1241 LOCKSTART; 1242 dip = VTOI(ap->a_dvp); 1243 ncp = ap->a_nch->ncp; 1244 name = ncp->nc_name; 1245 name_len = ncp->nc_nlen; 1246 lhc = hammer2_dirhash(name, name_len); 1247 1248 /* 1249 * Note: In DragonFly the kernel handles '.' and '..'. 1250 */ 1251 cparent = hammer2_inode_lock_sh(dip); 1252 cluster = hammer2_cluster_lookup(cparent, &key_next, 1253 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1254 HAMMER2_LOOKUP_SHARED, &ddflag); 1255 while (cluster) { 1256 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { 1257 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1258 if (ripdata->name_len == name_len && 1259 bcmp(ripdata->filename, name, name_len) == 0) { 1260 break; 1261 } 1262 } 1263 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 1264 key_next, 1265 lhc + HAMMER2_DIRHASH_LOMASK, 1266 HAMMER2_LOOKUP_SHARED); 1267 } 1268 hammer2_inode_unlock_sh(dip, cparent); 1269 1270 /* 1271 * Resolve hardlink entries before acquiring the inode. 1272 */ 1273 if (cluster) { 1274 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1275 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1276 hammer2_tid_t inum = ripdata->inum; 1277 error = hammer2_hardlink_find(dip, NULL, cluster); 1278 if (error) { 1279 kprintf("hammer2: unable to find hardlink " 1280 "0x%016jx\n", inum); 1281 hammer2_cluster_unlock(cluster); 1282 LOCKSTOP; 1283 return error; 1284 } 1285 } 1286 } 1287 1288 /* 1289 * nresolve needs to resolve hardlinks, the original cluster is not 1290 * sufficient. 1291 */ 1292 if (cluster) { 1293 ip = hammer2_inode_get(dip->pmp, dip, cluster); 1294 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1295 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1296 kprintf("nresolve: fixup hardlink\n"); 1297 hammer2_inode_ref(ip); 1298 hammer2_inode_unlock_ex(ip, NULL); 1299 hammer2_cluster_unlock(cluster); 1300 cluster = hammer2_inode_lock_ex(ip); 1301 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1302 hammer2_inode_drop(ip); 1303 kprintf("nresolve: fixup to type %02x\n", 1304 ripdata->type); 1305 } 1306 } else { 1307 ip = NULL; 1308 } 1309 1310 #if 0 1311 /* 1312 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1313 * If an error occurs chain and ip are left alone. 1314 * 1315 * XXX upgrade shared lock? 1316 */ 1317 if (ochain && chain && 1318 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) { 1319 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1320 chain->data->ipdata.filename); 1321 /* XXX retain shared lock on dip? (currently not held) */ 1322 hammer2_trans_init(&trans, dip->pmp, 0); 1323 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1324 hammer2_trans_done(&trans); 1325 } 1326 #endif 1327 1328 /* 1329 * Acquire the related vnode 1330 * 1331 * NOTE: For error processing, only ENOENT resolves the namecache 1332 * entry to NULL, otherwise we just return the error and 1333 * leave the namecache unresolved. 1334 * 1335 * NOTE: multiple hammer2_inode structures can be aliased to the 1336 * same chain element, for example for hardlinks. This 1337 * use case does not 'reattach' inode associations that 1338 * might already exist, but always allocates a new one. 1339 * 1340 * WARNING: inode structure is locked exclusively via inode_get 1341 * but chain was locked shared. inode_unlock_ex() 1342 * will handle it properly. 1343 */ 1344 if (cluster) { 1345 vp = hammer2_igetv(ip, cluster, &error); 1346 if (error == 0) { 1347 vn_unlock(vp); 1348 cache_setvp(ap->a_nch, vp); 1349 } else if (error == ENOENT) { 1350 cache_setvp(ap->a_nch, NULL); 1351 } 1352 hammer2_inode_unlock_ex(ip, cluster); 1353 1354 /* 1355 * The vp should not be released until after we've disposed 1356 * of our locks, because it might cause vop_inactive() to 1357 * be called. 1358 */ 1359 if (vp) 1360 vrele(vp); 1361 } else { 1362 error = ENOENT; 1363 cache_setvp(ap->a_nch, NULL); 1364 } 1365 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1366 ("resolve error %d/%p ap %p\n", 1367 error, ap->a_nch->ncp->nc_vp, ap)); 1368 LOCKSTOP; 1369 return error; 1370 } 1371 1372 static 1373 int 1374 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1375 { 1376 hammer2_inode_t *dip; 1377 hammer2_inode_t *ip; 1378 hammer2_cluster_t *cparent; 1379 int error; 1380 1381 LOCKSTART; 1382 dip = VTOI(ap->a_dvp); 1383 1384 if ((ip = dip->pip) == NULL) { 1385 *ap->a_vpp = NULL; 1386 LOCKSTOP; 1387 return ENOENT; 1388 } 1389 cparent = hammer2_inode_lock_ex(ip); 1390 *ap->a_vpp = hammer2_igetv(ip, cparent, &error); 1391 hammer2_inode_unlock_ex(ip, cparent); 1392 1393 LOCKSTOP; 1394 return error; 1395 } 1396 1397 static 1398 int 1399 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1400 { 1401 hammer2_inode_t *dip; 1402 hammer2_inode_t *nip; 1403 hammer2_trans_t trans; 1404 hammer2_cluster_t *cluster; 1405 struct namecache *ncp; 1406 const uint8_t *name; 1407 size_t name_len; 1408 int error; 1409 1410 LOCKSTART; 1411 dip = VTOI(ap->a_dvp); 1412 if (dip->pmp->ronly) { 1413 LOCKSTOP; 1414 return (EROFS); 1415 } 1416 1417 ncp = ap->a_nch->ncp; 1418 name = ncp->nc_name; 1419 name_len = ncp->nc_nlen; 1420 cluster = NULL; 1421 1422 hammer2_pfs_memory_wait(dip->pmp); 1423 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1424 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1425 name, name_len, &cluster, &error); 1426 if (error) { 1427 KKASSERT(nip == NULL); 1428 *ap->a_vpp = NULL; 1429 } else { 1430 *ap->a_vpp = hammer2_igetv(nip, cluster, &error); 1431 hammer2_inode_unlock_ex(nip, cluster); 1432 } 1433 hammer2_trans_done(&trans); 1434 1435 if (error == 0) { 1436 cache_setunresolved(ap->a_nch); 1437 cache_setvp(ap->a_nch, *ap->a_vpp); 1438 } 1439 LOCKSTOP; 1440 return error; 1441 } 1442 1443 /* 1444 * Return the largest contiguous physical disk range for the logical 1445 * request, in bytes. 1446 * 1447 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1448 * 1449 * Basically disabled, the logical buffer write thread has to deal with 1450 * buffers one-at-a-time. 1451 */ 1452 static 1453 int 1454 hammer2_vop_bmap(struct vop_bmap_args *ap) 1455 { 1456 *ap->a_doffsetp = NOOFFSET; 1457 if (ap->a_runp) 1458 *ap->a_runp = 0; 1459 if (ap->a_runb) 1460 *ap->a_runb = 0; 1461 return (EOPNOTSUPP); 1462 } 1463 1464 static 1465 int 1466 hammer2_vop_open(struct vop_open_args *ap) 1467 { 1468 return vop_stdopen(ap); 1469 } 1470 1471 /* 1472 * hammer2_vop_advlock { vp, id, op, fl, flags } 1473 */ 1474 static 1475 int 1476 hammer2_vop_advlock(struct vop_advlock_args *ap) 1477 { 1478 hammer2_inode_t *ip = VTOI(ap->a_vp); 1479 const hammer2_inode_data_t *ripdata; 1480 hammer2_cluster_t *cparent; 1481 hammer2_off_t size; 1482 1483 cparent = hammer2_inode_lock_sh(ip); 1484 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 1485 size = ripdata->size; 1486 hammer2_inode_unlock_sh(ip, cparent); 1487 return (lf_advlock(ap, &ip->advlock, size)); 1488 } 1489 1490 1491 static 1492 int 1493 hammer2_vop_close(struct vop_close_args *ap) 1494 { 1495 return vop_stdclose(ap); 1496 } 1497 1498 /* 1499 * hammer2_vop_nlink { nch, dvp, vp, cred } 1500 * 1501 * Create a hardlink from (vp) to {dvp, nch}. 1502 */ 1503 static 1504 int 1505 hammer2_vop_nlink(struct vop_nlink_args *ap) 1506 { 1507 hammer2_inode_t *fdip; /* target directory to create link in */ 1508 hammer2_inode_t *tdip; /* target directory to create link in */ 1509 hammer2_inode_t *cdip; /* common parent directory */ 1510 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1511 hammer2_cluster_t *cluster; 1512 hammer2_cluster_t *fdcluster; 1513 hammer2_cluster_t *tdcluster; 1514 hammer2_cluster_t *cdcluster; 1515 hammer2_trans_t trans; 1516 struct namecache *ncp; 1517 const uint8_t *name; 1518 size_t name_len; 1519 int error; 1520 1521 LOCKSTART; 1522 tdip = VTOI(ap->a_dvp); 1523 if (tdip->pmp->ronly) { 1524 LOCKSTOP; 1525 return (EROFS); 1526 } 1527 1528 ncp = ap->a_nch->ncp; 1529 name = ncp->nc_name; 1530 name_len = ncp->nc_nlen; 1531 1532 /* 1533 * ip represents the file being hardlinked. The file could be a 1534 * normal file or a hardlink target if it has already been hardlinked. 1535 * If ip is a hardlinked target then ip->pip represents the location 1536 * of the hardlinked target, NOT the location of the hardlink pointer. 1537 * 1538 * Bump nlinks and potentially also create or move the hardlink 1539 * target in the parent directory common to (ip) and (tdip). The 1540 * consolidation code can modify ip->cluster and ip->pip. The 1541 * returned cluster is locked. 1542 */ 1543 ip = VTOI(ap->a_vp); 1544 hammer2_pfs_memory_wait(ip->pmp); 1545 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_NEWINODE); 1546 1547 /* 1548 * The common parent directory must be locked first to avoid deadlocks. 1549 * Also note that fdip and/or tdip might match cdip. 1550 */ 1551 fdip = ip->pip; 1552 cdip = hammer2_inode_common_parent(fdip, tdip); 1553 cdcluster = hammer2_inode_lock_ex(cdip); 1554 fdcluster = hammer2_inode_lock_ex(fdip); 1555 tdcluster = hammer2_inode_lock_ex(tdip); 1556 cluster = hammer2_inode_lock_ex(ip); 1557 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1558 cdip, cdcluster, 1); 1559 if (error) 1560 goto done; 1561 1562 /* 1563 * Create a directory entry connected to the specified cluster. 1564 * 1565 * WARNING! chain can get moved by the connect (indirectly due to 1566 * potential indirect block creation). 1567 */ 1568 error = hammer2_inode_connect(&trans, &cluster, 1, 1569 tdip, tdcluster, 1570 name, name_len, 0); 1571 if (error == 0) { 1572 cache_setunresolved(ap->a_nch); 1573 cache_setvp(ap->a_nch, ap->a_vp); 1574 } 1575 done: 1576 hammer2_inode_unlock_ex(ip, cluster); 1577 hammer2_inode_unlock_ex(tdip, tdcluster); 1578 hammer2_inode_unlock_ex(fdip, fdcluster); 1579 hammer2_inode_unlock_ex(cdip, cdcluster); 1580 hammer2_inode_drop(cdip); 1581 hammer2_trans_done(&trans); 1582 1583 LOCKSTOP; 1584 return error; 1585 } 1586 1587 /* 1588 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1589 * 1590 * The operating system has already ensured that the directory entry 1591 * does not exist and done all appropriate namespace locking. 1592 */ 1593 static 1594 int 1595 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1596 { 1597 hammer2_inode_t *dip; 1598 hammer2_inode_t *nip; 1599 hammer2_trans_t trans; 1600 hammer2_cluster_t *ncluster; 1601 struct namecache *ncp; 1602 const uint8_t *name; 1603 size_t name_len; 1604 int error; 1605 1606 LOCKSTART; 1607 dip = VTOI(ap->a_dvp); 1608 if (dip->pmp->ronly) { 1609 LOCKSTOP; 1610 return (EROFS); 1611 } 1612 1613 ncp = ap->a_nch->ncp; 1614 name = ncp->nc_name; 1615 name_len = ncp->nc_nlen; 1616 hammer2_pfs_memory_wait(dip->pmp); 1617 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1618 ncluster = NULL; 1619 1620 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1621 name, name_len, &ncluster, &error); 1622 if (error) { 1623 KKASSERT(nip == NULL); 1624 *ap->a_vpp = NULL; 1625 } else { 1626 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1627 hammer2_inode_unlock_ex(nip, ncluster); 1628 } 1629 hammer2_trans_done(&trans); 1630 1631 if (error == 0) { 1632 cache_setunresolved(ap->a_nch); 1633 cache_setvp(ap->a_nch, *ap->a_vpp); 1634 } 1635 LOCKSTOP; 1636 return error; 1637 } 1638 1639 /* 1640 * Make a device node (typically a fifo) 1641 */ 1642 static 1643 int 1644 hammer2_vop_nmknod(struct vop_nmknod_args *ap) 1645 { 1646 hammer2_inode_t *dip; 1647 hammer2_inode_t *nip; 1648 hammer2_trans_t trans; 1649 hammer2_cluster_t *ncluster; 1650 struct namecache *ncp; 1651 const uint8_t *name; 1652 size_t name_len; 1653 int error; 1654 1655 LOCKSTART; 1656 dip = VTOI(ap->a_dvp); 1657 if (dip->pmp->ronly) { 1658 LOCKSTOP; 1659 return (EROFS); 1660 } 1661 1662 ncp = ap->a_nch->ncp; 1663 name = ncp->nc_name; 1664 name_len = ncp->nc_nlen; 1665 hammer2_pfs_memory_wait(dip->pmp); 1666 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1667 ncluster = NULL; 1668 1669 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1670 name, name_len, &ncluster, &error); 1671 if (error) { 1672 KKASSERT(nip == NULL); 1673 *ap->a_vpp = NULL; 1674 } else { 1675 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1676 hammer2_inode_unlock_ex(nip, ncluster); 1677 } 1678 hammer2_trans_done(&trans); 1679 1680 if (error == 0) { 1681 cache_setunresolved(ap->a_nch); 1682 cache_setvp(ap->a_nch, *ap->a_vpp); 1683 } 1684 LOCKSTOP; 1685 return error; 1686 } 1687 1688 /* 1689 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1690 */ 1691 static 1692 int 1693 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1694 { 1695 hammer2_inode_t *dip; 1696 hammer2_inode_t *nip; 1697 hammer2_cluster_t *ncparent; 1698 hammer2_trans_t trans; 1699 struct namecache *ncp; 1700 const uint8_t *name; 1701 size_t name_len; 1702 int error; 1703 1704 dip = VTOI(ap->a_dvp); 1705 if (dip->pmp->ronly) 1706 return (EROFS); 1707 1708 ncp = ap->a_nch->ncp; 1709 name = ncp->nc_name; 1710 name_len = ncp->nc_nlen; 1711 hammer2_pfs_memory_wait(dip->pmp); 1712 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1713 ncparent = NULL; 1714 1715 ap->a_vap->va_type = VLNK; /* enforce type */ 1716 1717 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1718 name, name_len, &ncparent, &error); 1719 if (error) { 1720 KKASSERT(nip == NULL); 1721 *ap->a_vpp = NULL; 1722 hammer2_trans_done(&trans); 1723 return error; 1724 } 1725 *ap->a_vpp = hammer2_igetv(nip, ncparent, &error); 1726 1727 /* 1728 * Build the softlink (~like file data) and finalize the namecache. 1729 */ 1730 if (error == 0) { 1731 size_t bytes; 1732 struct uio auio; 1733 struct iovec aiov; 1734 hammer2_inode_data_t *nipdata; 1735 1736 nipdata = &hammer2_cluster_wdata(ncparent)->ipdata; 1737 /* nipdata = &nip->chain->data->ipdata;XXX */ 1738 bytes = strlen(ap->a_target); 1739 1740 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1741 KKASSERT(nipdata->op_flags & 1742 HAMMER2_OPFLAG_DIRECTDATA); 1743 bcopy(ap->a_target, nipdata->u.data, bytes); 1744 nipdata->size = bytes; 1745 nip->size = bytes; 1746 hammer2_cluster_modsync(ncparent); 1747 hammer2_inode_unlock_ex(nip, ncparent); 1748 /* nipdata = NULL; not needed */ 1749 } else { 1750 hammer2_inode_unlock_ex(nip, ncparent); 1751 /* nipdata = NULL; not needed */ 1752 bzero(&auio, sizeof(auio)); 1753 bzero(&aiov, sizeof(aiov)); 1754 auio.uio_iov = &aiov; 1755 auio.uio_segflg = UIO_SYSSPACE; 1756 auio.uio_rw = UIO_WRITE; 1757 auio.uio_resid = bytes; 1758 auio.uio_iovcnt = 1; 1759 auio.uio_td = curthread; 1760 aiov.iov_base = ap->a_target; 1761 aiov.iov_len = bytes; 1762 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1763 /* XXX handle error */ 1764 error = 0; 1765 } 1766 } else { 1767 hammer2_inode_unlock_ex(nip, ncparent); 1768 } 1769 hammer2_trans_done(&trans); 1770 1771 /* 1772 * Finalize namecache 1773 */ 1774 if (error == 0) { 1775 cache_setunresolved(ap->a_nch); 1776 cache_setvp(ap->a_nch, *ap->a_vpp); 1777 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1778 } 1779 return error; 1780 } 1781 1782 /* 1783 * hammer2_vop_nremove { nch, dvp, cred } 1784 */ 1785 static 1786 int 1787 hammer2_vop_nremove(struct vop_nremove_args *ap) 1788 { 1789 hammer2_inode_t *dip; 1790 hammer2_trans_t trans; 1791 struct namecache *ncp; 1792 const uint8_t *name; 1793 size_t name_len; 1794 int error; 1795 1796 LOCKSTART; 1797 dip = VTOI(ap->a_dvp); 1798 if (dip->pmp->ronly) { 1799 LOCKSTOP; 1800 return(EROFS); 1801 } 1802 1803 ncp = ap->a_nch->ncp; 1804 name = ncp->nc_name; 1805 name_len = ncp->nc_nlen; 1806 1807 hammer2_pfs_memory_wait(dip->pmp); 1808 hammer2_trans_init(&trans, dip->pmp, 0); 1809 error = hammer2_unlink_file(&trans, dip, name, name_len, 1810 0, NULL, ap->a_nch, -1); 1811 hammer2_run_unlinkq(&trans, dip->pmp); 1812 hammer2_trans_done(&trans); 1813 if (error == 0) 1814 cache_unlink(ap->a_nch); 1815 LOCKSTOP; 1816 return (error); 1817 } 1818 1819 /* 1820 * hammer2_vop_nrmdir { nch, dvp, cred } 1821 */ 1822 static 1823 int 1824 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1825 { 1826 hammer2_inode_t *dip; 1827 hammer2_trans_t trans; 1828 struct namecache *ncp; 1829 const uint8_t *name; 1830 size_t name_len; 1831 int error; 1832 1833 LOCKSTART; 1834 dip = VTOI(ap->a_dvp); 1835 if (dip->pmp->ronly) { 1836 LOCKSTOP; 1837 return(EROFS); 1838 } 1839 1840 ncp = ap->a_nch->ncp; 1841 name = ncp->nc_name; 1842 name_len = ncp->nc_nlen; 1843 1844 hammer2_pfs_memory_wait(dip->pmp); 1845 hammer2_trans_init(&trans, dip->pmp, 0); 1846 hammer2_run_unlinkq(&trans, dip->pmp); 1847 error = hammer2_unlink_file(&trans, dip, name, name_len, 1848 1, NULL, ap->a_nch, -1); 1849 hammer2_trans_done(&trans); 1850 if (error == 0) 1851 cache_unlink(ap->a_nch); 1852 LOCKSTOP; 1853 return (error); 1854 } 1855 1856 /* 1857 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1858 */ 1859 static 1860 int 1861 hammer2_vop_nrename(struct vop_nrename_args *ap) 1862 { 1863 struct namecache *fncp; 1864 struct namecache *tncp; 1865 hammer2_inode_t *cdip; 1866 hammer2_inode_t *fdip; 1867 hammer2_inode_t *tdip; 1868 hammer2_inode_t *ip; 1869 hammer2_cluster_t *cluster; 1870 hammer2_cluster_t *fdcluster; 1871 hammer2_cluster_t *tdcluster; 1872 hammer2_cluster_t *cdcluster; 1873 hammer2_trans_t trans; 1874 const uint8_t *fname; 1875 size_t fname_len; 1876 const uint8_t *tname; 1877 size_t tname_len; 1878 int error; 1879 int tnch_error; 1880 int hlink; 1881 1882 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1883 return(EXDEV); 1884 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1885 return(EXDEV); 1886 1887 fdip = VTOI(ap->a_fdvp); /* source directory */ 1888 tdip = VTOI(ap->a_tdvp); /* target directory */ 1889 1890 if (fdip->pmp->ronly) 1891 return(EROFS); 1892 1893 LOCKSTART; 1894 fncp = ap->a_fnch->ncp; /* entry name in source */ 1895 fname = fncp->nc_name; 1896 fname_len = fncp->nc_nlen; 1897 1898 tncp = ap->a_tnch->ncp; /* entry name in target */ 1899 tname = tncp->nc_name; 1900 tname_len = tncp->nc_nlen; 1901 1902 hammer2_pfs_memory_wait(tdip->pmp); 1903 hammer2_trans_init(&trans, tdip->pmp, 0); 1904 1905 /* 1906 * ip is the inode being renamed. If this is a hardlink then 1907 * ip represents the actual file and not the hardlink marker. 1908 */ 1909 ip = VTOI(fncp->nc_vp); 1910 cluster = NULL; 1911 1912 1913 /* 1914 * The common parent directory must be locked first to avoid deadlocks. 1915 * Also note that fdip and/or tdip might match cdip. 1916 * 1917 * WARNING! fdip may not match ip->pip. That is, if the source file 1918 * is already a hardlink then what we are renaming is the 1919 * hardlink pointer, not the hardlink itself. The hardlink 1920 * directory (ip->pip) will already be at a common parent 1921 * of fdrip. 1922 * 1923 * Be sure to use ip->pip when finding the common parent 1924 * against tdip or we might accidently move the hardlink 1925 * target into a subdirectory that makes it inaccessible to 1926 * other pointers. 1927 */ 1928 cdip = hammer2_inode_common_parent(ip->pip, tdip); 1929 cdcluster = hammer2_inode_lock_ex(cdip); 1930 fdcluster = hammer2_inode_lock_ex(fdip); 1931 tdcluster = hammer2_inode_lock_ex(tdip); 1932 1933 /* 1934 * Keep a tight grip on the inode so the temporary unlinking from 1935 * the source location prior to linking to the target location 1936 * does not cause the cluster to be destroyed. 1937 * 1938 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 1939 * unlinking elements from their directories. Locking 1940 * the nlinks field does not lock the whole inode. 1941 */ 1942 hammer2_inode_ref(ip); 1943 1944 /* 1945 * Remove target if it exists. 1946 */ 1947 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, 1948 -1, NULL, ap->a_tnch, -1); 1949 tnch_error = error; 1950 if (error && error != ENOENT) 1951 goto done; 1952 1953 /* 1954 * When renaming a hardlinked file we may have to re-consolidate 1955 * the location of the hardlink target. 1956 * 1957 * If ip represents a regular file the consolidation code essentially 1958 * does nothing other than return the same locked cluster that was 1959 * passed in. 1960 * 1961 * The returned cluster will be locked. 1962 * 1963 * WARNING! We do not currently have a local copy of ipdata but 1964 * we do use one later remember that it must be reloaded 1965 * on any modification to the inode, including connects. 1966 */ 1967 cluster = hammer2_inode_lock_ex(ip); 1968 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1969 cdip, cdcluster, 0); 1970 if (error) 1971 goto done; 1972 1973 /* 1974 * Disconnect (fdip, fname) from the source directory. This will 1975 * disconnect (ip) if it represents a direct file. If (ip) represents 1976 * a hardlink the HARDLINK pointer object will be removed but the 1977 * hardlink will stay intact. 1978 * 1979 * Always pass nch as NULL because we intend to reconnect the inode, 1980 * so we don't want hammer2_unlink_file() to rename it to the hidden 1981 * open-but-unlinked directory. 1982 * 1983 * The target cluster may be marked DELETED but will not be destroyed 1984 * since we retain our hold on ip and cluster. 1985 * 1986 * NOTE: We pass nlinks as 0 (not -1) in order to retain the file's 1987 * link count. 1988 */ 1989 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, 1990 -1, &hlink, NULL, 0); 1991 KKASSERT(error != EAGAIN); 1992 if (error) 1993 goto done; 1994 1995 /* 1996 * Reconnect ip to target directory using cluster. Chains cannot 1997 * actually be moved, so this will duplicate the cluster in the new 1998 * spot and assign it to the ip, replacing the old cluster. 1999 * 2000 * WARNING: Because recursive locks are allowed and we unlinked the 2001 * file that we have a cluster-in-hand for just above, the 2002 * cluster might have been delete-duplicated. We must 2003 * refactor the cluster. 2004 * 2005 * WARNING: Chain locks can lock buffer cache buffers, to avoid 2006 * deadlocks we want to unlock before issuing a cache_*() 2007 * op (that might have to lock a vnode). 2008 * 2009 * NOTE: Pass nlinks as 0 because we retained the link count from 2010 * the unlink, so we do not have to modify it. 2011 */ 2012 error = hammer2_inode_connect(&trans, &cluster, hlink, 2013 tdip, tdcluster, 2014 tname, tname_len, 0); 2015 if (error == 0) { 2016 KKASSERT(cluster != NULL); 2017 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster); 2018 } 2019 done: 2020 hammer2_inode_unlock_ex(ip, cluster); 2021 hammer2_inode_unlock_ex(tdip, tdcluster); 2022 hammer2_inode_unlock_ex(fdip, fdcluster); 2023 hammer2_inode_unlock_ex(cdip, cdcluster); 2024 hammer2_inode_drop(ip); 2025 hammer2_inode_drop(cdip); 2026 hammer2_run_unlinkq(&trans, fdip->pmp); 2027 hammer2_trans_done(&trans); 2028 2029 /* 2030 * Issue the namecache update after unlocking all the internal 2031 * hammer structures, otherwise we might deadlock. 2032 */ 2033 if (tnch_error == 0) { 2034 cache_unlink(ap->a_tnch); 2035 cache_setunresolved(ap->a_tnch); 2036 } 2037 if (error == 0) 2038 cache_rename(ap->a_fnch, ap->a_tnch); 2039 2040 LOCKSTOP; 2041 return (error); 2042 } 2043 2044 /* 2045 * Strategy code (async logical file buffer I/O from system) 2046 * 2047 * WARNING: The strategy code cannot safely use hammer2 transactions 2048 * as this can deadlock against vfs_sync's vfsync() call 2049 * if multiple flushes are queued. All H2 structures must 2050 * already be present and ready for the DIO. 2051 * 2052 * Reads can be initiated asynchronously, writes have to be 2053 * spooled to a separate thread for action to avoid deadlocks. 2054 */ 2055 static int hammer2_strategy_read(struct vop_strategy_args *ap); 2056 static int hammer2_strategy_write(struct vop_strategy_args *ap); 2057 static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb); 2058 2059 static 2060 int 2061 hammer2_vop_strategy(struct vop_strategy_args *ap) 2062 { 2063 struct bio *biop; 2064 struct buf *bp; 2065 int error; 2066 2067 biop = ap->a_bio; 2068 bp = biop->bio_buf; 2069 2070 switch(bp->b_cmd) { 2071 case BUF_CMD_READ: 2072 error = hammer2_strategy_read(ap); 2073 ++hammer2_iod_file_read; 2074 break; 2075 case BUF_CMD_WRITE: 2076 error = hammer2_strategy_write(ap); 2077 ++hammer2_iod_file_write; 2078 break; 2079 default: 2080 bp->b_error = error = EINVAL; 2081 bp->b_flags |= B_ERROR; 2082 biodone(biop); 2083 break; 2084 } 2085 return (error); 2086 } 2087 2088 /* 2089 * Logical buffer I/O, async read. 2090 */ 2091 static 2092 int 2093 hammer2_strategy_read(struct vop_strategy_args *ap) 2094 { 2095 struct buf *bp; 2096 struct bio *bio; 2097 struct bio *nbio; 2098 hammer2_inode_t *ip; 2099 hammer2_cluster_t *cparent; 2100 hammer2_cluster_t *cluster; 2101 hammer2_key_t key_dummy; 2102 hammer2_key_t lbase; 2103 int ddflag; 2104 uint8_t btype; 2105 2106 bio = ap->a_bio; 2107 bp = bio->bio_buf; 2108 ip = VTOI(ap->a_vp); 2109 nbio = push_bio(bio); 2110 2111 lbase = bio->bio_offset; 2112 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 2113 2114 /* 2115 * Lookup the file offset. 2116 */ 2117 cparent = hammer2_inode_lock_sh(ip); 2118 cluster = hammer2_cluster_lookup(cparent, &key_dummy, 2119 lbase, lbase, 2120 HAMMER2_LOOKUP_NODATA | 2121 HAMMER2_LOOKUP_SHARED, 2122 &ddflag); 2123 hammer2_inode_unlock_sh(ip, cparent); 2124 2125 /* 2126 * Data is zero-fill if no cluster could be found 2127 * (XXX or EIO on a cluster failure). 2128 */ 2129 if (cluster == NULL) { 2130 bp->b_resid = 0; 2131 bp->b_error = 0; 2132 bzero(bp->b_data, bp->b_bcount); 2133 biodone(nbio); 2134 return(0); 2135 } 2136 2137 /* 2138 * Cluster elements must be type INODE or type DATA, but the 2139 * compression mode (or not) for DATA chains can be different for 2140 * each chain. This will be handled by the callback. 2141 * 2142 * If the cluster already has valid data the callback will be made 2143 * immediately/synchronously. 2144 */ 2145 btype = hammer2_cluster_type(cluster); 2146 if (btype != HAMMER2_BREF_TYPE_INODE && 2147 btype != HAMMER2_BREF_TYPE_DATA) { 2148 panic("READ PATH: hammer2_strategy_read: unknown bref type"); 2149 } 2150 hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback, 2151 nbio); 2152 return(0); 2153 } 2154 2155 /* 2156 * Read callback for hammer2_cluster_load_async(). The load function may 2157 * start several actual I/Os but will only make one callback, typically with 2158 * the first valid I/O XXX 2159 */ 2160 static 2161 void 2162 hammer2_strategy_read_callback(hammer2_iocb_t *iocb) 2163 { 2164 struct bio *bio = iocb->ptr; /* original logical buffer */ 2165 struct buf *bp = bio->bio_buf; /* original logical buffer */ 2166 hammer2_chain_t *chain; 2167 hammer2_cluster_t *cluster; 2168 hammer2_io_t *dio; 2169 char *data; 2170 int i; 2171 2172 /* 2173 * Extract data and handle iteration on I/O failure. iocb->off 2174 * is the cluster index for iteration. 2175 */ 2176 cluster = iocb->cluster; 2177 dio = iocb->dio; /* can be NULL if iocb not in progress */ 2178 2179 /* 2180 * Work to do if INPROG set, else dio is already good or dio is 2181 * NULL (which is the shortcut case if chain->data is already good). 2182 */ 2183 if (iocb->flags & HAMMER2_IOCB_INPROG) { 2184 /* 2185 * Read attempt not yet made. Issue an asynchronous read 2186 * if necessary and return, operation will chain back to 2187 * this function. 2188 */ 2189 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) { 2190 if (dio->bp == NULL || 2191 (dio->bp->b_flags & B_CACHE) == 0) { 2192 if (dio->bp) { 2193 bqrelse(dio->bp); 2194 dio->bp = NULL; 2195 } 2196 iocb->flags |= HAMMER2_IOCB_READ; 2197 breadcb(dio->hmp->devvp, 2198 dio->pbase, dio->psize, 2199 hammer2_io_callback, iocb); 2200 return; 2201 } 2202 } 2203 } 2204 2205 /* 2206 * If we have a DIO it is now done, check for an error and 2207 * calculate the data. 2208 * 2209 * If there is no DIO it is an optimization by 2210 * hammer2_cluster_load_async(), the data is available in 2211 * chain->data. 2212 */ 2213 if (dio) { 2214 if (dio->bp->b_flags & B_ERROR) { 2215 i = (int)iocb->lbase + 1; 2216 if (i >= cluster->nchains) { 2217 bp->b_flags |= B_ERROR; 2218 bp->b_error = dio->bp->b_error; 2219 hammer2_io_complete(iocb); 2220 biodone(bio); 2221 hammer2_cluster_unlock(cluster); 2222 } else { 2223 hammer2_io_complete(iocb); /* XXX */ 2224 chain = cluster->array[i]; 2225 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain); 2226 hammer2_adjreadcounter(&chain->bref, 2227 chain->bytes); 2228 iocb->chain = chain; 2229 iocb->lbase = (off_t)i; 2230 iocb->flags = 0; 2231 iocb->error = 0; 2232 hammer2_io_getblk(chain->hmp, 2233 chain->bref.data_off, 2234 chain->bytes, 2235 iocb); 2236 } 2237 return; 2238 } 2239 chain = iocb->chain; 2240 data = hammer2_io_data(dio, chain->bref.data_off); 2241 } else { 2242 /* 2243 * Special synchronous case, data present in chain->data. 2244 */ 2245 chain = iocb->chain; 2246 data = (void *)chain->data; 2247 } 2248 2249 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2250 /* 2251 * Data is embedded in the inode (copy from inode). 2252 */ 2253 bcopy(((hammer2_inode_data_t *)data)->u.data, 2254 bp->b_data, HAMMER2_EMBEDDED_BYTES); 2255 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2256 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2257 bp->b_resid = 0; 2258 bp->b_error = 0; 2259 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2260 /* 2261 * Data is on-media, issue device I/O and copy. 2262 * 2263 * XXX direct-IO shortcut could go here XXX. 2264 */ 2265 switch (HAMMER2_DEC_COMP(chain->bref.methods)) { 2266 case HAMMER2_COMP_LZ4: 2267 hammer2_decompress_LZ4_callback(data, chain->bytes, 2268 bio); 2269 break; 2270 case HAMMER2_COMP_ZLIB: 2271 hammer2_decompress_ZLIB_callback(data, chain->bytes, 2272 bio); 2273 break; 2274 case HAMMER2_COMP_NONE: 2275 KKASSERT(chain->bytes <= bp->b_bcount); 2276 bcopy(data, bp->b_data, chain->bytes); 2277 if (chain->bytes < bp->b_bcount) { 2278 bzero(bp->b_data + chain->bytes, 2279 bp->b_bcount - chain->bytes); 2280 } 2281 bp->b_flags |= B_NOTMETA; 2282 bp->b_resid = 0; 2283 bp->b_error = 0; 2284 break; 2285 default: 2286 panic("hammer2_strategy_read: " 2287 "unknown compression type"); 2288 } 2289 } else { 2290 /* bqrelse the dio to help stabilize the call to panic() */ 2291 if (dio) 2292 hammer2_io_bqrelse(&dio); 2293 panic("hammer2_strategy_read: unknown bref type"); 2294 } 2295 2296 /* 2297 * Once the iocb is cleaned up the DIO (if any) will no longer be 2298 * in-progress but will still have a ref. Be sure to release 2299 * the ref. 2300 */ 2301 hammer2_io_complete(iocb); /* physical management */ 2302 if (dio) /* physical dio & buffer */ 2303 hammer2_io_bqrelse(&dio); 2304 hammer2_cluster_unlock(cluster); /* cluster management */ 2305 biodone(bio); /* logical buffer */ 2306 } 2307 2308 static 2309 int 2310 hammer2_strategy_write(struct vop_strategy_args *ap) 2311 { 2312 hammer2_pfsmount_t *pmp; 2313 struct bio *bio; 2314 struct buf *bp; 2315 hammer2_inode_t *ip; 2316 2317 bio = ap->a_bio; 2318 bp = bio->bio_buf; 2319 ip = VTOI(ap->a_vp); 2320 pmp = ip->pmp; 2321 2322 hammer2_lwinprog_ref(pmp); 2323 mtx_lock(&pmp->wthread_mtx); 2324 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) { 2325 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2326 mtx_unlock(&pmp->wthread_mtx); 2327 wakeup(&pmp->wthread_bioq); 2328 } else { 2329 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2330 mtx_unlock(&pmp->wthread_mtx); 2331 } 2332 hammer2_lwinprog_wait(pmp); 2333 2334 return(0); 2335 } 2336 2337 /* 2338 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2339 */ 2340 static 2341 int 2342 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2343 { 2344 hammer2_inode_t *ip; 2345 int error; 2346 2347 LOCKSTART; 2348 ip = VTOI(ap->a_vp); 2349 2350 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2351 ap->a_fflag, ap->a_cred); 2352 LOCKSTOP; 2353 return (error); 2354 } 2355 2356 static 2357 int 2358 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2359 { 2360 struct mount *mp; 2361 hammer2_pfsmount_t *pmp; 2362 int rc; 2363 2364 LOCKSTART; 2365 switch (ap->a_op) { 2366 case (MOUNTCTL_SET_EXPORT): 2367 mp = ap->a_head.a_ops->head.vv_mount; 2368 pmp = MPTOPMP(mp); 2369 2370 if (ap->a_ctllen != sizeof(struct export_args)) 2371 rc = (EINVAL); 2372 else 2373 rc = vfs_export(mp, &pmp->export, 2374 (const struct export_args *)ap->a_ctl); 2375 break; 2376 default: 2377 rc = vop_stdmountctl(ap); 2378 break; 2379 } 2380 LOCKSTOP; 2381 return (rc); 2382 } 2383 2384 /* 2385 * This handles unlinked open files after the vnode is finally dereferenced. 2386 * To avoid deadlocks it cannot be called from the normal vnode recycling 2387 * path, so we call it (1) after a unlink, rmdir, or rename, (2) on every 2388 * flush, and (3) on umount. 2389 */ 2390 void 2391 hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp) 2392 { 2393 const hammer2_inode_data_t *ripdata; 2394 hammer2_inode_unlink_t *ipul; 2395 hammer2_inode_t *ip; 2396 hammer2_cluster_t *cluster; 2397 hammer2_cluster_t *cparent; 2398 2399 if (TAILQ_EMPTY(&pmp->unlinkq)) 2400 return; 2401 2402 LOCKSTART; 2403 spin_lock(&pmp->list_spin); 2404 while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) { 2405 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry); 2406 spin_unlock(&pmp->list_spin); 2407 ip = ipul->ip; 2408 kfree(ipul, pmp->minode); 2409 2410 cluster = hammer2_inode_lock_ex(ip); 2411 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 2412 if (hammer2_debug & 0x400) { 2413 kprintf("hammer2: unlink on reclaim: %s refs=%d\n", 2414 ripdata->filename, ip->refs); 2415 } 2416 KKASSERT(ripdata->nlinks == 0); 2417 2418 cparent = hammer2_cluster_parent(cluster); 2419 hammer2_cluster_delete(trans, cparent, cluster, 2420 HAMMER2_DELETE_PERMANENT); 2421 hammer2_cluster_unlock(cparent); 2422 hammer2_inode_unlock_ex(ip, cluster); /* inode lock */ 2423 hammer2_inode_drop(ip); /* ipul ref */ 2424 2425 spin_lock(&pmp->list_spin); 2426 } 2427 spin_unlock(&pmp->list_spin); 2428 LOCKSTOP; 2429 } 2430 2431 2432 /* 2433 * KQFILTER 2434 */ 2435 static void filt_hammer2detach(struct knote *kn); 2436 static int filt_hammer2read(struct knote *kn, long hint); 2437 static int filt_hammer2write(struct knote *kn, long hint); 2438 static int filt_hammer2vnode(struct knote *kn, long hint); 2439 2440 static struct filterops hammer2read_filtops = 2441 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2442 NULL, filt_hammer2detach, filt_hammer2read }; 2443 static struct filterops hammer2write_filtops = 2444 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2445 NULL, filt_hammer2detach, filt_hammer2write }; 2446 static struct filterops hammer2vnode_filtops = 2447 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2448 NULL, filt_hammer2detach, filt_hammer2vnode }; 2449 2450 static 2451 int 2452 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap) 2453 { 2454 struct vnode *vp = ap->a_vp; 2455 struct knote *kn = ap->a_kn; 2456 2457 switch (kn->kn_filter) { 2458 case EVFILT_READ: 2459 kn->kn_fop = &hammer2read_filtops; 2460 break; 2461 case EVFILT_WRITE: 2462 kn->kn_fop = &hammer2write_filtops; 2463 break; 2464 case EVFILT_VNODE: 2465 kn->kn_fop = &hammer2vnode_filtops; 2466 break; 2467 default: 2468 return (EOPNOTSUPP); 2469 } 2470 2471 kn->kn_hook = (caddr_t)vp; 2472 2473 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2474 2475 return(0); 2476 } 2477 2478 static void 2479 filt_hammer2detach(struct knote *kn) 2480 { 2481 struct vnode *vp = (void *)kn->kn_hook; 2482 2483 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2484 } 2485 2486 static int 2487 filt_hammer2read(struct knote *kn, long hint) 2488 { 2489 struct vnode *vp = (void *)kn->kn_hook; 2490 hammer2_inode_t *ip = VTOI(vp); 2491 off_t off; 2492 2493 if (hint == NOTE_REVOKE) { 2494 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2495 return(1); 2496 } 2497 off = ip->size - kn->kn_fp->f_offset; 2498 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2499 if (kn->kn_sfflags & NOTE_OLDAPI) 2500 return(1); 2501 return (kn->kn_data != 0); 2502 } 2503 2504 2505 static int 2506 filt_hammer2write(struct knote *kn, long hint) 2507 { 2508 if (hint == NOTE_REVOKE) 2509 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2510 kn->kn_data = 0; 2511 return (1); 2512 } 2513 2514 static int 2515 filt_hammer2vnode(struct knote *kn, long hint) 2516 { 2517 if (kn->kn_sfflags & hint) 2518 kn->kn_fflags |= hint; 2519 if (hint == NOTE_REVOKE) { 2520 kn->kn_flags |= (EV_EOF | EV_NODATA); 2521 return (1); 2522 } 2523 return (kn->kn_fflags != 0); 2524 } 2525 2526 /* 2527 * FIFO VOPS 2528 */ 2529 static 2530 int 2531 hammer2_vop_markatime(struct vop_markatime_args *ap) 2532 { 2533 hammer2_inode_t *ip; 2534 struct vnode *vp; 2535 2536 vp = ap->a_vp; 2537 ip = VTOI(vp); 2538 2539 if (ip->pmp->ronly) 2540 return(EROFS); 2541 return(0); 2542 } 2543 2544 static 2545 int 2546 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap) 2547 { 2548 int error; 2549 2550 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2551 if (error) 2552 error = hammer2_vop_kqfilter(ap); 2553 return(error); 2554 } 2555 2556 /* 2557 * VOPS vector 2558 */ 2559 struct vop_ops hammer2_vnode_vops = { 2560 .vop_default = vop_defaultop, 2561 .vop_fsync = hammer2_vop_fsync, 2562 .vop_getpages = vop_stdgetpages, 2563 .vop_putpages = vop_stdputpages, 2564 .vop_access = hammer2_vop_access, 2565 .vop_advlock = hammer2_vop_advlock, 2566 .vop_close = hammer2_vop_close, 2567 .vop_nlink = hammer2_vop_nlink, 2568 .vop_ncreate = hammer2_vop_ncreate, 2569 .vop_nsymlink = hammer2_vop_nsymlink, 2570 .vop_nremove = hammer2_vop_nremove, 2571 .vop_nrmdir = hammer2_vop_nrmdir, 2572 .vop_nrename = hammer2_vop_nrename, 2573 .vop_getattr = hammer2_vop_getattr, 2574 .vop_setattr = hammer2_vop_setattr, 2575 .vop_readdir = hammer2_vop_readdir, 2576 .vop_readlink = hammer2_vop_readlink, 2577 .vop_getpages = vop_stdgetpages, 2578 .vop_putpages = vop_stdputpages, 2579 .vop_read = hammer2_vop_read, 2580 .vop_write = hammer2_vop_write, 2581 .vop_open = hammer2_vop_open, 2582 .vop_inactive = hammer2_vop_inactive, 2583 .vop_reclaim = hammer2_vop_reclaim, 2584 .vop_nresolve = hammer2_vop_nresolve, 2585 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2586 .vop_nmkdir = hammer2_vop_nmkdir, 2587 .vop_nmknod = hammer2_vop_nmknod, 2588 .vop_ioctl = hammer2_vop_ioctl, 2589 .vop_mountctl = hammer2_vop_mountctl, 2590 .vop_bmap = hammer2_vop_bmap, 2591 .vop_strategy = hammer2_vop_strategy, 2592 .vop_kqfilter = hammer2_vop_kqfilter 2593 }; 2594 2595 struct vop_ops hammer2_spec_vops = { 2596 .vop_default = vop_defaultop, 2597 .vop_fsync = hammer2_vop_fsync, 2598 .vop_read = vop_stdnoread, 2599 .vop_write = vop_stdnowrite, 2600 .vop_access = hammer2_vop_access, 2601 .vop_close = hammer2_vop_close, 2602 .vop_markatime = hammer2_vop_markatime, 2603 .vop_getattr = hammer2_vop_getattr, 2604 .vop_inactive = hammer2_vop_inactive, 2605 .vop_reclaim = hammer2_vop_reclaim, 2606 .vop_setattr = hammer2_vop_setattr 2607 }; 2608 2609 struct vop_ops hammer2_fifo_vops = { 2610 .vop_default = fifo_vnoperate, 2611 .vop_fsync = hammer2_vop_fsync, 2612 #if 0 2613 .vop_read = hammer2_vop_fiforead, 2614 .vop_write = hammer2_vop_fifowrite, 2615 #endif 2616 .vop_access = hammer2_vop_access, 2617 #if 0 2618 .vop_close = hammer2_vop_fifoclose, 2619 #endif 2620 .vop_markatime = hammer2_vop_markatime, 2621 .vop_getattr = hammer2_vop_getattr, 2622 .vop_inactive = hammer2_vop_inactive, 2623 .vop_reclaim = hammer2_vop_reclaim, 2624 .vop_setattr = hammer2_vop_setattr, 2625 .vop_kqfilter = hammer2_vop_fifokqfilter 2626 }; 2627 2628