1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/mountctl.h> 53 #include <sys/dirent.h> 54 #include <sys/uio.h> 55 #include <sys/objcache.h> 56 #include <sys/event.h> 57 #include <sys/file.h> 58 #include <vfs/fifofs/fifo.h> 59 60 #include "hammer2.h" 61 #include "hammer2_lz4.h" 62 63 #include "zlib/hammer2_zlib.h" 64 65 #define ZFOFFSET (-2LL) 66 67 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 68 int seqcount); 69 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 70 int ioflag, int seqcount); 71 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 72 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 73 74 struct objcache *cache_buffer_read; 75 struct objcache *cache_buffer_write; 76 77 /* 78 * Callback used in read path in case that a block is compressed with LZ4. 79 */ 80 static 81 void 82 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio) 83 { 84 struct buf *bp; 85 char *compressed_buffer; 86 int compressed_size; 87 int result; 88 89 bp = bio->bio_buf; 90 91 #if 0 92 if bio->bio_caller_info2.index && 93 bio->bio_caller_info1.uvalue32 != 94 crc32(bp->b_data, bp->b_bufsize) --- return error 95 #endif 96 97 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 98 compressed_size = *(const int *)data; 99 KKASSERT(compressed_size <= bytes - sizeof(int)); 100 101 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 102 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]), 103 compressed_buffer, 104 compressed_size, 105 bp->b_bufsize); 106 if (result < 0) { 107 kprintf("READ PATH: Error during decompression." 108 "bio %016jx/%d\n", 109 (intmax_t)bio->bio_offset, bytes); 110 /* make sure it isn't random garbage */ 111 bzero(compressed_buffer, bp->b_bufsize); 112 } 113 KKASSERT(result <= bp->b_bufsize); 114 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 115 if (result < bp->b_bufsize) 116 bzero(bp->b_data + result, bp->b_bufsize - result); 117 objcache_put(cache_buffer_read, compressed_buffer); 118 bp->b_resid = 0; 119 bp->b_flags |= B_AGE; 120 } 121 122 /* 123 * Callback used in read path in case that a block is compressed with ZLIB. 124 * It is almost identical to LZ4 callback, so in theory they can be unified, 125 * but we didn't want to make changes in bio structure for that. 126 */ 127 static 128 void 129 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio) 130 { 131 struct buf *bp; 132 char *compressed_buffer; 133 z_stream strm_decompress; 134 int result; 135 int ret; 136 137 bp = bio->bio_buf; 138 139 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 140 strm_decompress.avail_in = 0; 141 strm_decompress.next_in = Z_NULL; 142 143 ret = inflateInit(&strm_decompress); 144 145 if (ret != Z_OK) 146 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); 147 148 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 149 strm_decompress.next_in = __DECONST(char *, data); 150 151 /* XXX supply proper size, subset of device bp */ 152 strm_decompress.avail_in = bytes; 153 strm_decompress.next_out = compressed_buffer; 154 strm_decompress.avail_out = bp->b_bufsize; 155 156 ret = inflate(&strm_decompress, Z_FINISH); 157 if (ret != Z_STREAM_END) { 158 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); 159 bzero(compressed_buffer, bp->b_bufsize); 160 } 161 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 162 result = bp->b_bufsize - strm_decompress.avail_out; 163 if (result < bp->b_bufsize) 164 bzero(bp->b_data + result, strm_decompress.avail_out); 165 objcache_put(cache_buffer_read, compressed_buffer); 166 ret = inflateEnd(&strm_decompress); 167 168 bp->b_resid = 0; 169 bp->b_flags |= B_AGE; 170 } 171 172 static __inline 173 void 174 hammer2_knote(struct vnode *vp, int flags) 175 { 176 if (flags) 177 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 178 } 179 180 /* 181 * Last reference to a vnode is going away but it is still cached. 182 */ 183 static 184 int 185 hammer2_vop_inactive(struct vop_inactive_args *ap) 186 { 187 hammer2_inode_t *ip; 188 hammer2_cluster_t *cluster; 189 struct vnode *vp; 190 191 LOCKSTART; 192 vp = ap->a_vp; 193 ip = VTOI(vp); 194 195 /* 196 * Degenerate case 197 */ 198 if (ip == NULL) { 199 vrecycle(vp); 200 LOCKSTOP; 201 return (0); 202 } 203 204 /* 205 * Detect updates to the embedded data which may be synchronized by 206 * the strategy code. Simply mark the inode modified so it gets 207 * picked up by our normal flush. 208 */ 209 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_NEVER | 210 HAMMER2_RESOLVE_RDONLY); 211 KKASSERT(cluster); 212 213 /* 214 * Check for deleted inodes and recycle immediately. 215 * 216 * WARNING: nvtruncbuf() can only be safely called without the inode 217 * lock held due to the way our write thread works. 218 */ 219 if (hammer2_cluster_isunlinked(cluster)) { 220 hammer2_key_t lbase; 221 int nblksize; 222 223 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL); 224 hammer2_inode_unlock(ip, cluster); 225 nvtruncbuf(vp, 0, nblksize, 0, 0); 226 vrecycle(vp); 227 } else { 228 hammer2_inode_unlock(ip, cluster); 229 } 230 LOCKSTOP; 231 return (0); 232 } 233 234 /* 235 * Reclaim a vnode so that it can be reused; after the inode is 236 * disassociated, the filesystem must manage it alone. 237 */ 238 static 239 int 240 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 241 { 242 hammer2_cluster_t *cluster; 243 hammer2_inode_t *ip; 244 hammer2_pfs_t *pmp; 245 struct vnode *vp; 246 247 LOCKSTART; 248 vp = ap->a_vp; 249 ip = VTOI(vp); 250 if (ip == NULL) { 251 LOCKSTOP; 252 return(0); 253 } 254 255 /* 256 * Inode must be locked for reclaim. 257 */ 258 pmp = ip->pmp; 259 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_NEVER | 260 HAMMER2_RESOLVE_RDONLY); 261 262 /* 263 * The final close of a deleted file or directory marks it for 264 * destruction. The DELETED flag allows the flusher to shortcut 265 * any modified blocks still unflushed (that is, just ignore them). 266 * 267 * HAMMER2 usually does not try to optimize the freemap by returning 268 * deleted blocks to it as it does not usually know how many snapshots 269 * might be referencing portions of the file/dir. 270 */ 271 vp->v_data = NULL; 272 ip->vp = NULL; 273 274 /* 275 * NOTE! We do not attempt to flush chains here, flushing is 276 * really fragile and could also deadlock. 277 */ 278 vclrisdirty(vp); 279 280 /* 281 * A reclaim can occur at any time so we cannot safely start a 282 * transaction to handle reclamation of unlinked files. Instead, 283 * the ip is left with a reference and placed on a linked list and 284 * handled later on. 285 */ 286 if (hammer2_cluster_isunlinked(cluster)) { 287 hammer2_inode_unlink_t *ipul; 288 289 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO); 290 ipul->ip = ip; 291 292 hammer2_spin_ex(&pmp->list_spin); 293 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry); 294 hammer2_spin_unex(&pmp->list_spin); 295 hammer2_inode_unlock(ip, cluster); /* unlock */ 296 /* retain ref from vp for ipul */ 297 } else { 298 hammer2_inode_unlock(ip, cluster); /* unlock */ 299 hammer2_inode_drop(ip); /* vp ref */ 300 } 301 /* cluster no longer referenced */ 302 /* cluster = NULL; not needed */ 303 304 /* 305 * XXX handle background sync when ip dirty, kernel will no longer 306 * notify us regarding this inode because there is no longer a 307 * vnode attached to it. 308 */ 309 310 LOCKSTOP; 311 return (0); 312 } 313 314 static 315 int 316 hammer2_vop_fsync(struct vop_fsync_args *ap) 317 { 318 hammer2_inode_t *ip; 319 hammer2_trans_t trans; 320 hammer2_cluster_t *cluster; 321 struct vnode *vp; 322 323 LOCKSTART; 324 vp = ap->a_vp; 325 ip = VTOI(vp); 326 327 #if 0 328 /* XXX can't do this yet */ 329 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 330 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 331 #endif 332 hammer2_trans_init(&trans, ip->pmp, 0); 333 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 334 335 /* 336 * Calling chain_flush here creates a lot of duplicative 337 * COW operations due to non-optimal vnode ordering. 338 * 339 * Only do it for an actual fsync() syscall. The other forms 340 * which call this function will eventually call chain_flush 341 * on the volume root as a catch-all, which is far more optimal. 342 */ 343 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 344 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 345 vclrisdirty(vp); 346 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME)) 347 hammer2_inode_fsync(&trans, ip, cluster); 348 349 #if 0 350 /* 351 * XXX creates discontinuity w/modify_tid 352 */ 353 if (ap->a_flags & VOP_FSYNC_SYSCALL) { 354 hammer2_flush(&trans, cluster); 355 } 356 #endif 357 hammer2_inode_unlock(ip, cluster); 358 hammer2_trans_done(&trans); 359 360 LOCKSTOP; 361 return (0); 362 } 363 364 static 365 int 366 hammer2_vop_access(struct vop_access_args *ap) 367 { 368 hammer2_inode_t *ip = VTOI(ap->a_vp); 369 const hammer2_inode_data_t *ripdata; 370 hammer2_cluster_t *cluster; 371 uid_t uid; 372 gid_t gid; 373 int error; 374 375 LOCKSTART; 376 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 377 HAMMER2_RESOLVE_SHARED); 378 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 379 uid = hammer2_to_unix_xid(&ripdata->uid); 380 gid = hammer2_to_unix_xid(&ripdata->gid); 381 error = vop_helper_access(ap, uid, gid, ripdata->mode, ripdata->uflags); 382 hammer2_inode_unlock(ip, cluster); 383 384 LOCKSTOP; 385 return (error); 386 } 387 388 static 389 int 390 hammer2_vop_getattr(struct vop_getattr_args *ap) 391 { 392 const hammer2_inode_data_t *ripdata; 393 hammer2_cluster_t *cluster; 394 hammer2_pfs_t *pmp; 395 hammer2_inode_t *ip; 396 struct vnode *vp; 397 struct vattr *vap; 398 399 LOCKSTART; 400 vp = ap->a_vp; 401 vap = ap->a_vap; 402 403 ip = VTOI(vp); 404 pmp = ip->pmp; 405 406 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 407 HAMMER2_RESOLVE_SHARED); 408 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 409 KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); 410 411 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 412 vap->va_fileid = ripdata->inum; 413 vap->va_mode = ripdata->mode; 414 vap->va_nlink = ripdata->nlinks; 415 vap->va_uid = hammer2_to_unix_xid(&ripdata->uid); 416 vap->va_gid = hammer2_to_unix_xid(&ripdata->gid); 417 vap->va_rmajor = 0; 418 vap->va_rminor = 0; 419 vap->va_size = ip->size; /* protected by shared lock */ 420 vap->va_blocksize = HAMMER2_PBUFSIZE; 421 vap->va_flags = ripdata->uflags; 422 hammer2_time_to_timespec(ripdata->ctime, &vap->va_ctime); 423 hammer2_time_to_timespec(ripdata->mtime, &vap->va_mtime); 424 hammer2_time_to_timespec(ripdata->mtime, &vap->va_atime); 425 vap->va_gen = 1; 426 vap->va_bytes = vap->va_size; /* XXX */ 427 vap->va_type = hammer2_get_vtype(ripdata); 428 vap->va_filerev = 0; 429 vap->va_uid_uuid = ripdata->uid; 430 vap->va_gid_uuid = ripdata->gid; 431 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 432 VA_FSID_UUID_VALID; 433 434 hammer2_inode_unlock(ip, cluster); 435 436 LOCKSTOP; 437 return (0); 438 } 439 440 static 441 int 442 hammer2_vop_setattr(struct vop_setattr_args *ap) 443 { 444 const hammer2_inode_data_t *ripdata; 445 hammer2_inode_data_t *wipdata; 446 hammer2_inode_t *ip; 447 hammer2_cluster_t *cluster; 448 hammer2_trans_t trans; 449 struct vnode *vp; 450 struct vattr *vap; 451 int error; 452 int kflags = 0; 453 int domtime = 0; 454 int dosync = 0; 455 uint64_t ctime; 456 457 LOCKSTART; 458 vp = ap->a_vp; 459 vap = ap->a_vap; 460 hammer2_update_time(&ctime); 461 462 ip = VTOI(vp); 463 464 if (ip->pmp->ronly) { 465 LOCKSTOP; 466 return(EROFS); 467 } 468 469 hammer2_pfs_memory_wait(ip->pmp); 470 hammer2_trans_init(&trans, ip->pmp, 0); 471 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 472 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 473 error = 0; 474 475 if (vap->va_flags != VNOVAL) { 476 u_int32_t flags; 477 478 flags = ripdata->uflags; 479 error = vop_helper_setattr_flags(&flags, vap->va_flags, 480 hammer2_to_unix_xid(&ripdata->uid), 481 ap->a_cred); 482 if (error == 0) { 483 if (ripdata->uflags != flags) { 484 wipdata = hammer2_cluster_modify_ip(&trans, ip, 485 cluster, 0); 486 wipdata->uflags = flags; 487 wipdata->ctime = ctime; 488 kflags |= NOTE_ATTRIB; 489 dosync = 1; 490 ripdata = wipdata; 491 } 492 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 493 error = 0; 494 goto done; 495 } 496 } 497 goto done; 498 } 499 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 500 error = EPERM; 501 goto done; 502 } 503 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 504 mode_t cur_mode = ripdata->mode; 505 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 506 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 507 uuid_t uuid_uid; 508 uuid_t uuid_gid; 509 510 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 511 ap->a_cred, 512 &cur_uid, &cur_gid, &cur_mode); 513 if (error == 0) { 514 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 515 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 516 if (bcmp(&uuid_uid, &ripdata->uid, sizeof(uuid_uid)) || 517 bcmp(&uuid_gid, &ripdata->gid, sizeof(uuid_gid)) || 518 ripdata->mode != cur_mode 519 ) { 520 wipdata = hammer2_cluster_modify_ip(&trans, ip, 521 cluster, 0); 522 wipdata->uid = uuid_uid; 523 wipdata->gid = uuid_gid; 524 wipdata->mode = cur_mode; 525 wipdata->ctime = ctime; 526 dosync = 1; 527 ripdata = wipdata; 528 } 529 kflags |= NOTE_ATTRIB; 530 } 531 } 532 533 /* 534 * Resize the file 535 */ 536 if (vap->va_size != VNOVAL && ip->size != vap->va_size) { 537 switch(vp->v_type) { 538 case VREG: 539 if (vap->va_size == ip->size) 540 break; 541 hammer2_inode_unlock(ip, cluster); 542 if (vap->va_size < ip->size) { 543 hammer2_truncate_file(ip, vap->va_size); 544 } else { 545 hammer2_extend_file(ip, vap->va_size); 546 } 547 cluster = hammer2_inode_lock(ip, 548 HAMMER2_RESOLVE_ALWAYS); 549 /* RELOAD */ 550 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 551 domtime = 1; 552 break; 553 default: 554 error = EINVAL; 555 goto done; 556 } 557 } 558 #if 0 559 /* atime not supported */ 560 if (vap->va_atime.tv_sec != VNOVAL) { 561 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 562 wipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 563 kflags |= NOTE_ATTRIB; 564 dosync = 1; 565 ripdata = wipdata; 566 } 567 #endif 568 if (vap->va_mtime.tv_sec != VNOVAL) { 569 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 570 wipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 571 kflags |= NOTE_ATTRIB; 572 domtime = 0; 573 dosync = 1; 574 ripdata = wipdata; 575 } 576 if (vap->va_mode != (mode_t)VNOVAL) { 577 mode_t cur_mode = ripdata->mode; 578 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 579 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 580 581 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 582 cur_uid, cur_gid, &cur_mode); 583 if (error == 0 && ripdata->mode != cur_mode) { 584 wipdata = hammer2_cluster_modify_ip(&trans, ip, 585 cluster, 0); 586 wipdata->mode = cur_mode; 587 wipdata->ctime = ctime; 588 kflags |= NOTE_ATTRIB; 589 dosync = 1; 590 ripdata = wipdata; 591 } 592 } 593 594 /* 595 * If a truncation occurred we must call inode_fsync() now in order 596 * to trim the related data chains, otherwise a later expansion can 597 * cause havoc. 598 */ 599 if (dosync) { 600 hammer2_cluster_modsync(cluster); 601 dosync = 0; 602 } 603 hammer2_inode_fsync(&trans, ip, cluster); 604 605 /* 606 * Cleanup. If domtime is set an additional inode modification 607 * must be flagged. All other modifications will have already 608 * set INODE_MODIFIED and called vsetisdirty(). 609 */ 610 done: 611 if (domtime) { 612 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED | 613 HAMMER2_INODE_MTIME); 614 vsetisdirty(ip->vp); 615 } 616 if (dosync) 617 hammer2_cluster_modsync(cluster); 618 hammer2_inode_unlock(ip, cluster); 619 hammer2_trans_done(&trans); 620 hammer2_knote(ip->vp, kflags); 621 622 LOCKSTOP; 623 return (error); 624 } 625 626 static 627 int 628 hammer2_vop_readdir(struct vop_readdir_args *ap) 629 { 630 const hammer2_inode_data_t *ripdata; 631 hammer2_inode_t *ip; 632 hammer2_inode_t *xip; 633 hammer2_cluster_t *cparent; 634 hammer2_cluster_t *cluster; 635 hammer2_cluster_t *xcluster; 636 hammer2_blockref_t bref; 637 hammer2_tid_t inum; 638 hammer2_key_t key_next; 639 hammer2_key_t lkey; 640 struct uio *uio; 641 off_t *cookies; 642 off_t saveoff; 643 int cookie_index; 644 int ncookies; 645 int error; 646 int dtype; 647 int r; 648 649 LOCKSTART; 650 ip = VTOI(ap->a_vp); 651 uio = ap->a_uio; 652 saveoff = uio->uio_offset; 653 654 /* 655 * Setup cookies directory entry cookies if requested 656 */ 657 if (ap->a_ncookies) { 658 ncookies = uio->uio_resid / 16 + 1; 659 if (ncookies > 1024) 660 ncookies = 1024; 661 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 662 } else { 663 ncookies = -1; 664 cookies = NULL; 665 } 666 cookie_index = 0; 667 668 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 669 HAMMER2_RESOLVE_SHARED); 670 671 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 672 673 /* 674 * Handle artificial entries. To ensure that only positive 64 bit 675 * quantities are returned to userland we always strip off bit 63. 676 * The hash code is designed such that codes 0x0000-0x7FFF are not 677 * used, allowing us to use these codes for articial entries. 678 * 679 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 680 * allow '..' to cross the mount point into (e.g.) the super-root. 681 */ 682 error = 0; 683 cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 684 685 if (saveoff == 0) { 686 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK; 687 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 688 if (r) 689 goto done; 690 if (cookies) 691 cookies[cookie_index] = saveoff; 692 ++saveoff; 693 ++cookie_index; 694 if (cookie_index == ncookies) 695 goto done; 696 } 697 698 if (saveoff == 1) { 699 /* 700 * Be careful with lockorder when accessing ".." 701 * 702 * (ip is the current dir. xip is the parent dir). 703 */ 704 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK; 705 while (ip->pip != NULL && ip != ip->pmp->iroot) { 706 xip = ip->pip; 707 hammer2_inode_ref(xip); 708 hammer2_inode_unlock(ip, cparent); 709 xcluster = hammer2_inode_lock(xip, 710 HAMMER2_RESOLVE_ALWAYS | 711 HAMMER2_RESOLVE_SHARED); 712 713 cparent = hammer2_inode_lock(ip, 714 HAMMER2_RESOLVE_ALWAYS | 715 HAMMER2_RESOLVE_SHARED); 716 hammer2_inode_drop(xip); 717 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 718 if (xip == ip->pip) { 719 inum = hammer2_cluster_rdata(xcluster)-> 720 ipdata.inum & HAMMER2_DIRHASH_USERMSK; 721 hammer2_inode_unlock(xip, xcluster); 722 break; 723 } 724 hammer2_inode_unlock(xip, xcluster); 725 } 726 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 727 if (r) 728 goto done; 729 if (cookies) 730 cookies[cookie_index] = saveoff; 731 ++saveoff; 732 ++cookie_index; 733 if (cookie_index == ncookies) 734 goto done; 735 } 736 737 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 738 if (hammer2_debug & 0x0020) 739 kprintf("readdir: lkey %016jx\n", lkey); 740 741 /* 742 * parent is the inode cluster, already locked for us. Don't 743 * double lock shared locks as this will screw up upgrades. 744 */ 745 if (error) { 746 goto done; 747 } 748 cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey, 749 HAMMER2_LOOKUP_SHARED); 750 if (cluster == NULL) { 751 cluster = hammer2_cluster_lookup(cparent, &key_next, 752 lkey, (hammer2_key_t)-1, 753 HAMMER2_LOOKUP_SHARED); 754 } 755 if (cluster) 756 hammer2_cluster_bref(cluster, &bref); 757 while (cluster) { 758 if (hammer2_debug & 0x0020) 759 kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n", 760 cparent->focus, cluster->focus, 761 bref.key, key_next); 762 763 if (bref.type == HAMMER2_BREF_TYPE_INODE) { 764 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 765 dtype = hammer2_get_dtype(ripdata); 766 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 767 r = vop_write_dirent(&error, uio, 768 ripdata->inum & 769 HAMMER2_DIRHASH_USERMSK, 770 dtype, 771 ripdata->name_len, 772 ripdata->filename); 773 if (r) 774 break; 775 if (cookies) 776 cookies[cookie_index] = saveoff; 777 ++cookie_index; 778 } else { 779 /* XXX chain error */ 780 kprintf("bad chain type readdir %d\n", bref.type); 781 } 782 783 /* 784 * Keys may not be returned in order so once we have a 785 * placemarker (cluster) the scan must allow the full range 786 * or some entries will be missed. 787 */ 788 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 789 key_next, (hammer2_key_t)-1, 790 HAMMER2_LOOKUP_SHARED); 791 if (cluster) { 792 hammer2_cluster_bref(cluster, &bref); 793 saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1; 794 } else { 795 saveoff = (hammer2_key_t)-1; 796 } 797 if (cookie_index == ncookies) 798 break; 799 } 800 if (cluster) { 801 hammer2_cluster_unlock(cluster); 802 hammer2_cluster_drop(cluster); 803 } 804 done: 805 hammer2_inode_unlock(ip, cparent); 806 if (ap->a_eofflag) 807 *ap->a_eofflag = (cluster == NULL); 808 if (hammer2_debug & 0x0020) 809 kprintf("readdir: done at %016jx\n", saveoff); 810 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 811 if (error && cookie_index == 0) { 812 if (cookies) { 813 kfree(cookies, M_TEMP); 814 *ap->a_ncookies = 0; 815 *ap->a_cookies = NULL; 816 } 817 } else { 818 if (cookies) { 819 *ap->a_ncookies = cookie_index; 820 *ap->a_cookies = cookies; 821 } 822 } 823 LOCKSTOP; 824 return (error); 825 } 826 827 /* 828 * hammer2_vop_readlink { vp, uio, cred } 829 */ 830 static 831 int 832 hammer2_vop_readlink(struct vop_readlink_args *ap) 833 { 834 struct vnode *vp; 835 hammer2_inode_t *ip; 836 int error; 837 838 vp = ap->a_vp; 839 if (vp->v_type != VLNK) 840 return (EINVAL); 841 ip = VTOI(vp); 842 843 error = hammer2_read_file(ip, ap->a_uio, 0); 844 return (error); 845 } 846 847 static 848 int 849 hammer2_vop_read(struct vop_read_args *ap) 850 { 851 struct vnode *vp; 852 hammer2_inode_t *ip; 853 struct uio *uio; 854 int error; 855 int seqcount; 856 int bigread; 857 858 /* 859 * Read operations supported on this vnode? 860 */ 861 vp = ap->a_vp; 862 if (vp->v_type != VREG) 863 return (EINVAL); 864 865 /* 866 * Misc 867 */ 868 ip = VTOI(vp); 869 uio = ap->a_uio; 870 error = 0; 871 872 seqcount = ap->a_ioflag >> 16; 873 bigread = (uio->uio_resid > 100 * 1024 * 1024); 874 875 error = hammer2_read_file(ip, uio, seqcount); 876 return (error); 877 } 878 879 static 880 int 881 hammer2_vop_write(struct vop_write_args *ap) 882 { 883 hammer2_inode_t *ip; 884 hammer2_trans_t trans; 885 thread_t td; 886 struct vnode *vp; 887 struct uio *uio; 888 int error; 889 int seqcount; 890 int bigwrite; 891 892 /* 893 * Read operations supported on this vnode? 894 */ 895 vp = ap->a_vp; 896 if (vp->v_type != VREG) 897 return (EINVAL); 898 899 /* 900 * Misc 901 */ 902 ip = VTOI(vp); 903 uio = ap->a_uio; 904 error = 0; 905 if (ip->pmp->ronly) { 906 return (EROFS); 907 } 908 909 seqcount = ap->a_ioflag >> 16; 910 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 911 912 /* 913 * Check resource limit 914 */ 915 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 916 uio->uio_offset + uio->uio_resid > 917 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 918 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 919 return (EFBIG); 920 } 921 922 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 923 924 /* 925 * The transaction interlocks against flushes initiations 926 * (note: but will run concurrently with the actual flush). 927 */ 928 hammer2_trans_init(&trans, ip->pmp, 0); 929 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount); 930 hammer2_trans_done(&trans); 931 932 return (error); 933 } 934 935 /* 936 * Perform read operations on a file or symlink given an UNLOCKED 937 * inode and uio. 938 * 939 * The passed ip is not locked. 940 */ 941 static 942 int 943 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 944 { 945 hammer2_off_t size; 946 struct buf *bp; 947 int error; 948 949 error = 0; 950 951 /* 952 * UIO read loop. 953 * 954 * WARNING! Assumes that the kernel interlocks size changes at the 955 * vnode level. 956 */ 957 hammer2_mtx_sh(&ip->lock); 958 size = ip->size; 959 hammer2_mtx_unlock(&ip->lock); 960 961 while (uio->uio_resid > 0 && uio->uio_offset < size) { 962 hammer2_key_t lbase; 963 hammer2_key_t leof; 964 int lblksize; 965 int loff; 966 int n; 967 968 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 969 &lbase, &leof); 970 971 error = cluster_read(ip->vp, leof, lbase, lblksize, 972 uio->uio_resid, seqcount * BKVASIZE, 973 &bp); 974 975 if (error) 976 break; 977 loff = (int)(uio->uio_offset - lbase); 978 n = lblksize - loff; 979 if (n > uio->uio_resid) 980 n = uio->uio_resid; 981 if (n > size - uio->uio_offset) 982 n = (int)(size - uio->uio_offset); 983 bp->b_flags |= B_AGE; 984 uiomove((char *)bp->b_data + loff, n, uio); 985 bqrelse(bp); 986 } 987 return (error); 988 } 989 990 /* 991 * Write to the file represented by the inode via the logical buffer cache. 992 * The inode may represent a regular file or a symlink. 993 * 994 * The inode must not be locked. 995 */ 996 static 997 int 998 hammer2_write_file(hammer2_inode_t *ip, 999 struct uio *uio, int ioflag, int seqcount) 1000 { 1001 hammer2_key_t old_eof; 1002 hammer2_key_t new_eof; 1003 struct buf *bp; 1004 int kflags; 1005 int error; 1006 int modified; 1007 1008 /* 1009 * Setup if append 1010 * 1011 * WARNING! Assumes that the kernel interlocks size changes at the 1012 * vnode level. 1013 */ 1014 hammer2_mtx_ex(&ip->lock); 1015 if (ioflag & IO_APPEND) 1016 uio->uio_offset = ip->size; 1017 old_eof = ip->size; 1018 hammer2_mtx_unlock(&ip->lock); 1019 1020 /* 1021 * Extend the file if necessary. If the write fails at some point 1022 * we will truncate it back down to cover as much as we were able 1023 * to write. 1024 * 1025 * Doing this now makes it easier to calculate buffer sizes in 1026 * the loop. 1027 */ 1028 kflags = 0; 1029 error = 0; 1030 modified = 0; 1031 1032 if (uio->uio_offset + uio->uio_resid > old_eof) { 1033 new_eof = uio->uio_offset + uio->uio_resid; 1034 modified = 1; 1035 hammer2_extend_file(ip, new_eof); 1036 kflags |= NOTE_EXTEND; 1037 } else { 1038 new_eof = old_eof; 1039 } 1040 1041 /* 1042 * UIO write loop 1043 */ 1044 while (uio->uio_resid > 0) { 1045 hammer2_key_t lbase; 1046 int trivial; 1047 int endofblk; 1048 int lblksize; 1049 int loff; 1050 int n; 1051 1052 /* 1053 * Don't allow the buffer build to blow out the buffer 1054 * cache. 1055 */ 1056 if ((ioflag & IO_RECURSE) == 0) 1057 bwillwrite(HAMMER2_PBUFSIZE); 1058 1059 /* 1060 * This nominally tells us how much we can cluster and 1061 * what the logical buffer size needs to be. Currently 1062 * we don't try to cluster the write and just handle one 1063 * block at a time. 1064 */ 1065 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1066 &lbase, NULL); 1067 loff = (int)(uio->uio_offset - lbase); 1068 1069 KKASSERT(lblksize <= 65536); 1070 1071 /* 1072 * Calculate bytes to copy this transfer and whether the 1073 * copy completely covers the buffer or not. 1074 */ 1075 trivial = 0; 1076 n = lblksize - loff; 1077 if (n > uio->uio_resid) { 1078 n = uio->uio_resid; 1079 if (loff == lbase && uio->uio_offset + n == new_eof) 1080 trivial = 1; 1081 endofblk = 0; 1082 } else { 1083 if (loff == 0) 1084 trivial = 1; 1085 endofblk = 1; 1086 } 1087 1088 /* 1089 * Get the buffer 1090 */ 1091 if (uio->uio_segflg == UIO_NOCOPY) { 1092 /* 1093 * Issuing a write with the same data backing the 1094 * buffer. Instantiate the buffer to collect the 1095 * backing vm pages, then read-in any missing bits. 1096 * 1097 * This case is used by vop_stdputpages(). 1098 */ 1099 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1100 if ((bp->b_flags & B_CACHE) == 0) { 1101 bqrelse(bp); 1102 error = bread(ip->vp, lbase, lblksize, &bp); 1103 } 1104 } else if (trivial) { 1105 /* 1106 * Even though we are entirely overwriting the buffer 1107 * we may still have to zero it out to avoid a 1108 * mmap/write visibility issue. 1109 */ 1110 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1111 if ((bp->b_flags & B_CACHE) == 0) 1112 vfs_bio_clrbuf(bp); 1113 } else { 1114 /* 1115 * Partial overwrite, read in any missing bits then 1116 * replace the portion being written. 1117 * 1118 * (The strategy code will detect zero-fill physical 1119 * blocks for this case). 1120 */ 1121 error = bread(ip->vp, lbase, lblksize, &bp); 1122 if (error == 0) 1123 bheavy(bp); 1124 } 1125 1126 if (error) { 1127 brelse(bp); 1128 break; 1129 } 1130 1131 /* 1132 * Ok, copy the data in 1133 */ 1134 error = uiomove(bp->b_data + loff, n, uio); 1135 kflags |= NOTE_WRITE; 1136 modified = 1; 1137 if (error) { 1138 brelse(bp); 1139 break; 1140 } 1141 1142 /* 1143 * WARNING: Pageout daemon will issue UIO_NOCOPY writes 1144 * with IO_SYNC or IO_ASYNC set. These writes 1145 * must be handled as the pageout daemon expects. 1146 */ 1147 if (ioflag & IO_SYNC) { 1148 bwrite(bp); 1149 } else if ((ioflag & IO_DIRECT) && endofblk) { 1150 bawrite(bp); 1151 } else if (ioflag & IO_ASYNC) { 1152 bawrite(bp); 1153 } else { 1154 bdwrite(bp); 1155 } 1156 } 1157 1158 /* 1159 * Cleanup. If we extended the file EOF but failed to write through 1160 * the entire write is a failure and we have to back-up. 1161 */ 1162 if (error && new_eof != old_eof) { 1163 hammer2_truncate_file(ip, old_eof); 1164 } else if (modified) { 1165 hammer2_mtx_ex(&ip->lock); 1166 hammer2_update_time(&ip->mtime); 1167 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME); 1168 hammer2_mtx_unlock(&ip->lock); 1169 } 1170 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1171 hammer2_knote(ip->vp, kflags); 1172 vsetisdirty(ip->vp); 1173 1174 return error; 1175 } 1176 1177 /* 1178 * Truncate the size of a file. The inode must not be locked. 1179 * 1180 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1181 * 1182 * WARNING: nvtruncbuf() can only be safely called without the inode lock 1183 * held due to the way our write thread works. 1184 * 1185 * WARNING! Assumes that the kernel interlocks size changes at the 1186 * vnode level. 1187 */ 1188 static 1189 void 1190 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1191 { 1192 hammer2_key_t lbase; 1193 int nblksize; 1194 1195 LOCKSTART; 1196 if (ip->vp) { 1197 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1198 nvtruncbuf(ip->vp, nsize, 1199 nblksize, (int)nsize & (nblksize - 1), 1200 0); 1201 } 1202 hammer2_mtx_ex(&ip->lock); 1203 ip->size = nsize; 1204 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1205 hammer2_mtx_unlock(&ip->lock); 1206 LOCKSTOP; 1207 } 1208 1209 /* 1210 * Extend the size of a file. The inode must not be locked. 1211 * 1212 * WARNING! Assumes that the kernel interlocks size changes at the 1213 * vnode level. 1214 * 1215 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1216 */ 1217 static 1218 void 1219 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1220 { 1221 hammer2_key_t lbase; 1222 hammer2_key_t osize; 1223 int oblksize; 1224 int nblksize; 1225 1226 LOCKSTART; 1227 hammer2_mtx_ex(&ip->lock); 1228 osize = ip->size; 1229 ip->size = nsize; 1230 hammer2_mtx_unlock(&ip->lock); 1231 1232 if (ip->vp) { 1233 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1234 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1235 nvextendbuf(ip->vp, 1236 osize, nsize, 1237 oblksize, nblksize, 1238 -1, -1, 0); 1239 } 1240 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1241 LOCKSTOP; 1242 } 1243 1244 static 1245 int 1246 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1247 { 1248 hammer2_inode_t *ip; 1249 hammer2_inode_t *dip; 1250 hammer2_cluster_t *cparent; 1251 hammer2_cluster_t *cluster; 1252 const hammer2_inode_data_t *ripdata; 1253 hammer2_key_t key_next; 1254 hammer2_key_t lhc; 1255 struct namecache *ncp; 1256 const uint8_t *name; 1257 size_t name_len; 1258 int error = 0; 1259 struct vnode *vp; 1260 1261 LOCKSTART; 1262 dip = VTOI(ap->a_dvp); 1263 ncp = ap->a_nch->ncp; 1264 name = ncp->nc_name; 1265 name_len = ncp->nc_nlen; 1266 lhc = hammer2_dirhash(name, name_len); 1267 1268 /* 1269 * Note: In DragonFly the kernel handles '.' and '..'. 1270 */ 1271 cparent = hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS | 1272 HAMMER2_RESOLVE_SHARED); 1273 1274 cluster = hammer2_cluster_lookup(cparent, &key_next, 1275 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1276 HAMMER2_LOOKUP_SHARED); 1277 while (cluster) { 1278 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { 1279 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1280 if (ripdata->name_len == name_len && 1281 bcmp(ripdata->filename, name, name_len) == 0) { 1282 break; 1283 } 1284 } 1285 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 1286 key_next, 1287 lhc + HAMMER2_DIRHASH_LOMASK, 1288 HAMMER2_LOOKUP_SHARED); 1289 } 1290 hammer2_inode_unlock(dip, cparent); 1291 1292 /* 1293 * Resolve hardlink entries before acquiring the inode. 1294 */ 1295 if (cluster) { 1296 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1297 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1298 hammer2_tid_t inum = ripdata->inum; 1299 error = hammer2_hardlink_find(dip, NULL, &cluster); 1300 if (error) { 1301 kprintf("hammer2: unable to find hardlink " 1302 "0x%016jx\n", inum); 1303 LOCKSTOP; 1304 1305 return error; 1306 } 1307 } 1308 } 1309 1310 /* 1311 * nresolve needs to resolve hardlinks, the original cluster is not 1312 * sufficient. 1313 */ 1314 if (cluster) { 1315 ip = hammer2_inode_get(dip->pmp, dip, cluster); 1316 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1317 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1318 kprintf("nresolve: fixup hardlink\n"); 1319 hammer2_inode_ref(ip); 1320 hammer2_inode_unlock(ip, NULL); 1321 hammer2_cluster_unlock(cluster); 1322 hammer2_cluster_drop(cluster); 1323 cluster = hammer2_inode_lock(ip, 1324 HAMMER2_RESOLVE_ALWAYS); 1325 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1326 hammer2_inode_drop(ip); 1327 kprintf("nresolve: fixup to type %02x\n", 1328 ripdata->type); 1329 } 1330 } else { 1331 ip = NULL; 1332 } 1333 1334 #if 0 1335 /* 1336 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1337 * If an error occurs chain and ip are left alone. 1338 * 1339 * XXX upgrade shared lock? 1340 */ 1341 if (ochain && chain && 1342 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) { 1343 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1344 chain->data->ipdata.filename); 1345 /* XXX retain shared lock on dip? (currently not held) */ 1346 hammer2_trans_init(&trans, dip->pmp, 0); 1347 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1348 hammer2_trans_done(&trans); 1349 } 1350 #endif 1351 1352 /* 1353 * Acquire the related vnode 1354 * 1355 * NOTE: For error processing, only ENOENT resolves the namecache 1356 * entry to NULL, otherwise we just return the error and 1357 * leave the namecache unresolved. 1358 * 1359 * NOTE: multiple hammer2_inode structures can be aliased to the 1360 * same chain element, for example for hardlinks. This 1361 * use case does not 'reattach' inode associations that 1362 * might already exist, but always allocates a new one. 1363 * 1364 * WARNING: inode structure is locked exclusively via inode_get 1365 * but chain was locked shared. inode_unlock() 1366 * will handle it properly. 1367 */ 1368 if (cluster) { 1369 vp = hammer2_igetv(ip, cluster, &error); 1370 if (error == 0) { 1371 vn_unlock(vp); 1372 cache_setvp(ap->a_nch, vp); 1373 } else if (error == ENOENT) { 1374 cache_setvp(ap->a_nch, NULL); 1375 } 1376 hammer2_inode_unlock(ip, cluster); 1377 1378 /* 1379 * The vp should not be released until after we've disposed 1380 * of our locks, because it might cause vop_inactive() to 1381 * be called. 1382 */ 1383 if (vp) 1384 vrele(vp); 1385 } else { 1386 error = ENOENT; 1387 cache_setvp(ap->a_nch, NULL); 1388 } 1389 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1390 ("resolve error %d/%p ap %p\n", 1391 error, ap->a_nch->ncp->nc_vp, ap)); 1392 LOCKSTOP; 1393 return error; 1394 } 1395 1396 static 1397 int 1398 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1399 { 1400 hammer2_inode_t *dip; 1401 hammer2_inode_t *ip; 1402 hammer2_cluster_t *cparent; 1403 int error; 1404 1405 LOCKSTART; 1406 dip = VTOI(ap->a_dvp); 1407 1408 if ((ip = dip->pip) == NULL) { 1409 *ap->a_vpp = NULL; 1410 LOCKSTOP; 1411 return ENOENT; 1412 } 1413 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 1414 *ap->a_vpp = hammer2_igetv(ip, cparent, &error); 1415 hammer2_inode_unlock(ip, cparent); 1416 1417 LOCKSTOP; 1418 return error; 1419 } 1420 1421 static 1422 int 1423 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1424 { 1425 hammer2_inode_t *dip; 1426 hammer2_inode_t *nip; 1427 hammer2_trans_t trans; 1428 hammer2_cluster_t *cluster; 1429 struct namecache *ncp; 1430 const uint8_t *name; 1431 size_t name_len; 1432 int error; 1433 1434 LOCKSTART; 1435 dip = VTOI(ap->a_dvp); 1436 if (dip->pmp->ronly) { 1437 LOCKSTOP; 1438 return (EROFS); 1439 } 1440 1441 ncp = ap->a_nch->ncp; 1442 name = ncp->nc_name; 1443 name_len = ncp->nc_nlen; 1444 cluster = NULL; 1445 1446 hammer2_pfs_memory_wait(dip->pmp); 1447 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1448 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1449 name, name_len, 1450 &cluster, 0, &error); 1451 if (error) { 1452 KKASSERT(nip == NULL); 1453 *ap->a_vpp = NULL; 1454 } else { 1455 *ap->a_vpp = hammer2_igetv(nip, cluster, &error); 1456 hammer2_inode_unlock(nip, cluster); 1457 } 1458 hammer2_trans_done(&trans); 1459 1460 if (error == 0) { 1461 cache_setunresolved(ap->a_nch); 1462 cache_setvp(ap->a_nch, *ap->a_vpp); 1463 } 1464 LOCKSTOP; 1465 return error; 1466 } 1467 1468 /* 1469 * Return the largest contiguous physical disk range for the logical 1470 * request, in bytes. 1471 * 1472 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1473 * 1474 * Basically disabled, the logical buffer write thread has to deal with 1475 * buffers one-at-a-time. 1476 */ 1477 static 1478 int 1479 hammer2_vop_bmap(struct vop_bmap_args *ap) 1480 { 1481 *ap->a_doffsetp = NOOFFSET; 1482 if (ap->a_runp) 1483 *ap->a_runp = 0; 1484 if (ap->a_runb) 1485 *ap->a_runb = 0; 1486 return (EOPNOTSUPP); 1487 } 1488 1489 static 1490 int 1491 hammer2_vop_open(struct vop_open_args *ap) 1492 { 1493 return vop_stdopen(ap); 1494 } 1495 1496 /* 1497 * hammer2_vop_advlock { vp, id, op, fl, flags } 1498 */ 1499 static 1500 int 1501 hammer2_vop_advlock(struct vop_advlock_args *ap) 1502 { 1503 hammer2_inode_t *ip = VTOI(ap->a_vp); 1504 const hammer2_inode_data_t *ripdata; 1505 hammer2_cluster_t *cparent; 1506 hammer2_off_t size; 1507 1508 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 1509 HAMMER2_RESOLVE_SHARED); 1510 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 1511 size = ripdata->size; 1512 hammer2_inode_unlock(ip, cparent); 1513 return (lf_advlock(ap, &ip->advlock, size)); 1514 } 1515 1516 1517 static 1518 int 1519 hammer2_vop_close(struct vop_close_args *ap) 1520 { 1521 return vop_stdclose(ap); 1522 } 1523 1524 /* 1525 * hammer2_vop_nlink { nch, dvp, vp, cred } 1526 * 1527 * Create a hardlink from (vp) to {dvp, nch}. 1528 */ 1529 static 1530 int 1531 hammer2_vop_nlink(struct vop_nlink_args *ap) 1532 { 1533 hammer2_inode_t *fdip; /* target directory to create link in */ 1534 hammer2_inode_t *tdip; /* target directory to create link in */ 1535 hammer2_inode_t *cdip; /* common parent directory */ 1536 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1537 hammer2_cluster_t *cluster; 1538 hammer2_cluster_t *fdcluster; 1539 hammer2_cluster_t *tdcluster; 1540 hammer2_cluster_t *cdcluster; 1541 hammer2_trans_t trans; 1542 struct namecache *ncp; 1543 const uint8_t *name; 1544 size_t name_len; 1545 int error; 1546 1547 LOCKSTART; 1548 tdip = VTOI(ap->a_dvp); 1549 if (tdip->pmp->ronly) { 1550 LOCKSTOP; 1551 return (EROFS); 1552 } 1553 1554 ncp = ap->a_nch->ncp; 1555 name = ncp->nc_name; 1556 name_len = ncp->nc_nlen; 1557 1558 /* 1559 * ip represents the file being hardlinked. The file could be a 1560 * normal file or a hardlink target if it has already been hardlinked. 1561 * If ip is a hardlinked target then ip->pip represents the location 1562 * of the hardlinked target, NOT the location of the hardlink pointer. 1563 * 1564 * Bump nlinks and potentially also create or move the hardlink 1565 * target in the parent directory common to (ip) and (tdip). The 1566 * consolidation code can modify ip->cluster and ip->pip. The 1567 * returned cluster is locked. 1568 */ 1569 ip = VTOI(ap->a_vp); 1570 hammer2_pfs_memory_wait(ip->pmp); 1571 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_NEWINODE); 1572 1573 /* 1574 * The common parent directory must be locked first to avoid deadlocks. 1575 * Also note that fdip and/or tdip might match cdip. 1576 */ 1577 fdip = ip->pip; 1578 cdip = hammer2_inode_common_parent(fdip, tdip); 1579 cdcluster = hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS); 1580 fdcluster = hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS); 1581 tdcluster = hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS); 1582 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 1583 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1584 cdip, cdcluster, 1); 1585 if (error) 1586 goto done; 1587 1588 /* 1589 * Create a directory entry connected to the specified cluster. 1590 * 1591 * WARNING! chain can get moved by the connect (indirectly due to 1592 * potential indirect block creation). 1593 */ 1594 error = hammer2_inode_connect(&trans, &cluster, 1, 1595 tdip, tdcluster, 1596 name, name_len, 0); 1597 if (error == 0) { 1598 cache_setunresolved(ap->a_nch); 1599 cache_setvp(ap->a_nch, ap->a_vp); 1600 } 1601 done: 1602 hammer2_inode_unlock(ip, cluster); 1603 hammer2_inode_unlock(tdip, tdcluster); 1604 hammer2_inode_unlock(fdip, fdcluster); 1605 hammer2_inode_unlock(cdip, cdcluster); 1606 hammer2_inode_drop(cdip); 1607 hammer2_trans_done(&trans); 1608 1609 LOCKSTOP; 1610 return error; 1611 } 1612 1613 /* 1614 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1615 * 1616 * The operating system has already ensured that the directory entry 1617 * does not exist and done all appropriate namespace locking. 1618 */ 1619 static 1620 int 1621 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1622 { 1623 hammer2_inode_t *dip; 1624 hammer2_inode_t *nip; 1625 hammer2_trans_t trans; 1626 hammer2_cluster_t *ncluster; 1627 struct namecache *ncp; 1628 const uint8_t *name; 1629 size_t name_len; 1630 int error; 1631 1632 LOCKSTART; 1633 dip = VTOI(ap->a_dvp); 1634 if (dip->pmp->ronly) { 1635 LOCKSTOP; 1636 return (EROFS); 1637 } 1638 1639 ncp = ap->a_nch->ncp; 1640 name = ncp->nc_name; 1641 name_len = ncp->nc_nlen; 1642 hammer2_pfs_memory_wait(dip->pmp); 1643 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1644 ncluster = NULL; 1645 1646 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1647 name, name_len, 1648 &ncluster, 0, &error); 1649 if (error) { 1650 KKASSERT(nip == NULL); 1651 *ap->a_vpp = NULL; 1652 } else { 1653 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1654 hammer2_inode_unlock(nip, ncluster); 1655 } 1656 hammer2_trans_done(&trans); 1657 1658 if (error == 0) { 1659 cache_setunresolved(ap->a_nch); 1660 cache_setvp(ap->a_nch, *ap->a_vpp); 1661 } 1662 LOCKSTOP; 1663 return error; 1664 } 1665 1666 /* 1667 * Make a device node (typically a fifo) 1668 */ 1669 static 1670 int 1671 hammer2_vop_nmknod(struct vop_nmknod_args *ap) 1672 { 1673 hammer2_inode_t *dip; 1674 hammer2_inode_t *nip; 1675 hammer2_trans_t trans; 1676 hammer2_cluster_t *ncluster; 1677 struct namecache *ncp; 1678 const uint8_t *name; 1679 size_t name_len; 1680 int error; 1681 1682 LOCKSTART; 1683 dip = VTOI(ap->a_dvp); 1684 if (dip->pmp->ronly) { 1685 LOCKSTOP; 1686 return (EROFS); 1687 } 1688 1689 ncp = ap->a_nch->ncp; 1690 name = ncp->nc_name; 1691 name_len = ncp->nc_nlen; 1692 hammer2_pfs_memory_wait(dip->pmp); 1693 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1694 ncluster = NULL; 1695 1696 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1697 name, name_len, 1698 &ncluster, 0, &error); 1699 if (error) { 1700 KKASSERT(nip == NULL); 1701 *ap->a_vpp = NULL; 1702 } else { 1703 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1704 hammer2_inode_unlock(nip, ncluster); 1705 } 1706 hammer2_trans_done(&trans); 1707 1708 if (error == 0) { 1709 cache_setunresolved(ap->a_nch); 1710 cache_setvp(ap->a_nch, *ap->a_vpp); 1711 } 1712 LOCKSTOP; 1713 return error; 1714 } 1715 1716 /* 1717 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1718 */ 1719 static 1720 int 1721 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1722 { 1723 hammer2_inode_t *dip; 1724 hammer2_inode_t *nip; 1725 hammer2_cluster_t *ncparent; 1726 hammer2_trans_t trans; 1727 struct namecache *ncp; 1728 const uint8_t *name; 1729 size_t name_len; 1730 int error; 1731 1732 dip = VTOI(ap->a_dvp); 1733 if (dip->pmp->ronly) 1734 return (EROFS); 1735 1736 ncp = ap->a_nch->ncp; 1737 name = ncp->nc_name; 1738 name_len = ncp->nc_nlen; 1739 hammer2_pfs_memory_wait(dip->pmp); 1740 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1741 ncparent = NULL; 1742 1743 ap->a_vap->va_type = VLNK; /* enforce type */ 1744 1745 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1746 name, name_len, 1747 &ncparent, 0, &error); 1748 if (error) { 1749 KKASSERT(nip == NULL); 1750 *ap->a_vpp = NULL; 1751 hammer2_trans_done(&trans); 1752 return error; 1753 } 1754 *ap->a_vpp = hammer2_igetv(nip, ncparent, &error); 1755 1756 /* 1757 * Build the softlink (~like file data) and finalize the namecache. 1758 */ 1759 if (error == 0) { 1760 size_t bytes; 1761 struct uio auio; 1762 struct iovec aiov; 1763 hammer2_inode_data_t *nipdata; 1764 1765 nipdata = &hammer2_cluster_wdata(ncparent)->ipdata; 1766 /* nipdata = &nip->chain->data->ipdata;XXX */ 1767 bytes = strlen(ap->a_target); 1768 1769 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1770 KKASSERT(nipdata->op_flags & 1771 HAMMER2_OPFLAG_DIRECTDATA); 1772 bcopy(ap->a_target, nipdata->u.data, bytes); 1773 nipdata->size = bytes; 1774 nip->size = bytes; 1775 hammer2_cluster_modsync(ncparent); 1776 hammer2_inode_unlock(nip, ncparent); 1777 /* nipdata = NULL; not needed */ 1778 } else { 1779 hammer2_inode_unlock(nip, ncparent); 1780 /* nipdata = NULL; not needed */ 1781 bzero(&auio, sizeof(auio)); 1782 bzero(&aiov, sizeof(aiov)); 1783 auio.uio_iov = &aiov; 1784 auio.uio_segflg = UIO_SYSSPACE; 1785 auio.uio_rw = UIO_WRITE; 1786 auio.uio_resid = bytes; 1787 auio.uio_iovcnt = 1; 1788 auio.uio_td = curthread; 1789 aiov.iov_base = ap->a_target; 1790 aiov.iov_len = bytes; 1791 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1792 /* XXX handle error */ 1793 error = 0; 1794 } 1795 } else { 1796 hammer2_inode_unlock(nip, ncparent); 1797 } 1798 hammer2_trans_done(&trans); 1799 1800 /* 1801 * Finalize namecache 1802 */ 1803 if (error == 0) { 1804 cache_setunresolved(ap->a_nch); 1805 cache_setvp(ap->a_nch, *ap->a_vpp); 1806 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1807 } 1808 return error; 1809 } 1810 1811 /* 1812 * hammer2_vop_nremove { nch, dvp, cred } 1813 */ 1814 static 1815 int 1816 hammer2_vop_nremove(struct vop_nremove_args *ap) 1817 { 1818 hammer2_inode_t *dip; 1819 hammer2_trans_t trans; 1820 struct namecache *ncp; 1821 const uint8_t *name; 1822 size_t name_len; 1823 int error; 1824 1825 LOCKSTART; 1826 dip = VTOI(ap->a_dvp); 1827 if (dip->pmp->ronly) { 1828 LOCKSTOP; 1829 return(EROFS); 1830 } 1831 1832 ncp = ap->a_nch->ncp; 1833 name = ncp->nc_name; 1834 name_len = ncp->nc_nlen; 1835 1836 hammer2_pfs_memory_wait(dip->pmp); 1837 hammer2_trans_init(&trans, dip->pmp, 0); 1838 error = hammer2_unlink_file(&trans, dip, name, name_len, 1839 0, NULL, ap->a_nch, -1); 1840 hammer2_run_unlinkq(&trans, dip->pmp); 1841 hammer2_trans_done(&trans); 1842 if (error == 0) 1843 cache_unlink(ap->a_nch); 1844 LOCKSTOP; 1845 return (error); 1846 } 1847 1848 /* 1849 * hammer2_vop_nrmdir { nch, dvp, cred } 1850 */ 1851 static 1852 int 1853 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1854 { 1855 hammer2_inode_t *dip; 1856 hammer2_trans_t trans; 1857 struct namecache *ncp; 1858 const uint8_t *name; 1859 size_t name_len; 1860 int error; 1861 1862 LOCKSTART; 1863 dip = VTOI(ap->a_dvp); 1864 if (dip->pmp->ronly) { 1865 LOCKSTOP; 1866 return(EROFS); 1867 } 1868 1869 ncp = ap->a_nch->ncp; 1870 name = ncp->nc_name; 1871 name_len = ncp->nc_nlen; 1872 1873 hammer2_pfs_memory_wait(dip->pmp); 1874 hammer2_trans_init(&trans, dip->pmp, 0); 1875 hammer2_run_unlinkq(&trans, dip->pmp); 1876 error = hammer2_unlink_file(&trans, dip, name, name_len, 1877 1, NULL, ap->a_nch, -1); 1878 hammer2_trans_done(&trans); 1879 if (error == 0) 1880 cache_unlink(ap->a_nch); 1881 LOCKSTOP; 1882 return (error); 1883 } 1884 1885 /* 1886 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1887 */ 1888 static 1889 int 1890 hammer2_vop_nrename(struct vop_nrename_args *ap) 1891 { 1892 struct namecache *fncp; 1893 struct namecache *tncp; 1894 hammer2_inode_t *cdip; 1895 hammer2_inode_t *fdip; 1896 hammer2_inode_t *tdip; 1897 hammer2_inode_t *ip; 1898 hammer2_cluster_t *cluster; 1899 hammer2_cluster_t *fdcluster; 1900 hammer2_cluster_t *tdcluster; 1901 hammer2_cluster_t *cdcluster; 1902 hammer2_trans_t trans; 1903 const uint8_t *fname; 1904 size_t fname_len; 1905 const uint8_t *tname; 1906 size_t tname_len; 1907 int error; 1908 int tnch_error; 1909 int hlink; 1910 1911 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1912 return(EXDEV); 1913 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1914 return(EXDEV); 1915 1916 fdip = VTOI(ap->a_fdvp); /* source directory */ 1917 tdip = VTOI(ap->a_tdvp); /* target directory */ 1918 1919 if (fdip->pmp->ronly) 1920 return(EROFS); 1921 1922 LOCKSTART; 1923 fncp = ap->a_fnch->ncp; /* entry name in source */ 1924 fname = fncp->nc_name; 1925 fname_len = fncp->nc_nlen; 1926 1927 tncp = ap->a_tnch->ncp; /* entry name in target */ 1928 tname = tncp->nc_name; 1929 tname_len = tncp->nc_nlen; 1930 1931 hammer2_pfs_memory_wait(tdip->pmp); 1932 hammer2_trans_init(&trans, tdip->pmp, 0); 1933 1934 /* 1935 * ip is the inode being renamed. If this is a hardlink then 1936 * ip represents the actual file and not the hardlink marker. 1937 */ 1938 ip = VTOI(fncp->nc_vp); 1939 cluster = NULL; 1940 1941 1942 /* 1943 * The common parent directory must be locked first to avoid deadlocks. 1944 * Also note that fdip and/or tdip might match cdip. 1945 * 1946 * WARNING! fdip may not match ip->pip. That is, if the source file 1947 * is already a hardlink then what we are renaming is the 1948 * hardlink pointer, not the hardlink itself. The hardlink 1949 * directory (ip->pip) will already be at a common parent 1950 * of fdrip. 1951 * 1952 * Be sure to use ip->pip when finding the common parent 1953 * against tdip or we might accidently move the hardlink 1954 * target into a subdirectory that makes it inaccessible to 1955 * other pointers. 1956 */ 1957 cdip = hammer2_inode_common_parent(ip->pip, tdip); 1958 cdcluster = hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS); 1959 fdcluster = hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS); 1960 tdcluster = hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS); 1961 1962 /* 1963 * Keep a tight grip on the inode so the temporary unlinking from 1964 * the source location prior to linking to the target location 1965 * does not cause the cluster to be destroyed. 1966 * 1967 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 1968 * unlinking elements from their directories. Locking 1969 * the nlinks field does not lock the whole inode. 1970 */ 1971 hammer2_inode_ref(ip); 1972 1973 /* 1974 * Remove target if it exists. 1975 */ 1976 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, 1977 -1, NULL, ap->a_tnch, -1); 1978 tnch_error = error; 1979 if (error && error != ENOENT) 1980 goto done; 1981 1982 /* 1983 * When renaming a hardlinked file we may have to re-consolidate 1984 * the location of the hardlink target. 1985 * 1986 * If ip represents a regular file the consolidation code essentially 1987 * does nothing other than return the same locked cluster that was 1988 * passed in. 1989 * 1990 * The returned cluster will be locked. 1991 * 1992 * WARNING! We do not currently have a local copy of ipdata but 1993 * we do use one later remember that it must be reloaded 1994 * on any modification to the inode, including connects. 1995 */ 1996 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 1997 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1998 cdip, cdcluster, 0); 1999 if (error) 2000 goto done; 2001 2002 /* 2003 * Disconnect (fdip, fname) from the source directory. This will 2004 * disconnect (ip) if it represents a direct file. If (ip) represents 2005 * a hardlink the HARDLINK pointer object will be removed but the 2006 * hardlink will stay intact. 2007 * 2008 * Always pass nch as NULL because we intend to reconnect the inode, 2009 * so we don't want hammer2_unlink_file() to rename it to the hidden 2010 * open-but-unlinked directory. 2011 * 2012 * The target cluster may be marked DELETED but will not be destroyed 2013 * since we retain our hold on ip and cluster. 2014 * 2015 * NOTE: We pass nlinks as 0 (not -1) in order to retain the file's 2016 * link count. 2017 */ 2018 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, 2019 -1, &hlink, NULL, 0); 2020 KKASSERT(error != EAGAIN); 2021 if (error) 2022 goto done; 2023 2024 /* 2025 * Reconnect ip to target directory using cluster. Chains cannot 2026 * actually be moved, so this will duplicate the cluster in the new 2027 * spot and assign it to the ip, replacing the old cluster. 2028 * 2029 * WARNING: Because recursive locks are allowed and we unlinked the 2030 * file that we have a cluster-in-hand for just above, the 2031 * cluster might have been delete-duplicated. We must 2032 * refactor the cluster. 2033 * 2034 * WARNING: Chain locks can lock buffer cache buffers, to avoid 2035 * deadlocks we want to unlock before issuing a cache_*() 2036 * op (that might have to lock a vnode). 2037 * 2038 * NOTE: Pass nlinks as 0 because we retained the link count from 2039 * the unlink, so we do not have to modify it. 2040 */ 2041 error = hammer2_inode_connect(&trans, &cluster, hlink, 2042 tdip, tdcluster, 2043 tname, tname_len, 0); 2044 if (error == 0) { 2045 KKASSERT(cluster != NULL); 2046 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster); 2047 } 2048 done: 2049 hammer2_inode_unlock(ip, cluster); 2050 hammer2_inode_unlock(tdip, tdcluster); 2051 hammer2_inode_unlock(fdip, fdcluster); 2052 hammer2_inode_unlock(cdip, cdcluster); 2053 hammer2_inode_drop(ip); 2054 hammer2_inode_drop(cdip); 2055 hammer2_run_unlinkq(&trans, fdip->pmp); 2056 hammer2_trans_done(&trans); 2057 2058 /* 2059 * Issue the namecache update after unlocking all the internal 2060 * hammer structures, otherwise we might deadlock. 2061 */ 2062 if (tnch_error == 0) { 2063 cache_unlink(ap->a_tnch); 2064 cache_setunresolved(ap->a_tnch); 2065 } 2066 if (error == 0) 2067 cache_rename(ap->a_fnch, ap->a_tnch); 2068 2069 LOCKSTOP; 2070 return (error); 2071 } 2072 2073 /* 2074 * Strategy code (async logical file buffer I/O from system) 2075 * 2076 * WARNING: The strategy code cannot safely use hammer2 transactions 2077 * as this can deadlock against vfs_sync's vfsync() call 2078 * if multiple flushes are queued. All H2 structures must 2079 * already be present and ready for the DIO. 2080 * 2081 * Reads can be initiated asynchronously, writes have to be 2082 * spooled to a separate thread for action to avoid deadlocks. 2083 */ 2084 static int hammer2_strategy_read(struct vop_strategy_args *ap); 2085 static int hammer2_strategy_write(struct vop_strategy_args *ap); 2086 static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb); 2087 2088 static 2089 int 2090 hammer2_vop_strategy(struct vop_strategy_args *ap) 2091 { 2092 struct bio *biop; 2093 struct buf *bp; 2094 int error; 2095 2096 biop = ap->a_bio; 2097 bp = biop->bio_buf; 2098 2099 switch(bp->b_cmd) { 2100 case BUF_CMD_READ: 2101 error = hammer2_strategy_read(ap); 2102 ++hammer2_iod_file_read; 2103 break; 2104 case BUF_CMD_WRITE: 2105 error = hammer2_strategy_write(ap); 2106 ++hammer2_iod_file_write; 2107 break; 2108 default: 2109 bp->b_error = error = EINVAL; 2110 bp->b_flags |= B_ERROR; 2111 biodone(biop); 2112 break; 2113 } 2114 return (error); 2115 } 2116 2117 /* 2118 * Logical buffer I/O, async read. 2119 */ 2120 static 2121 int 2122 hammer2_strategy_read(struct vop_strategy_args *ap) 2123 { 2124 struct buf *bp; 2125 struct bio *bio; 2126 struct bio *nbio; 2127 hammer2_inode_t *ip; 2128 hammer2_cluster_t *cparent; 2129 hammer2_cluster_t *cluster; 2130 hammer2_key_t key_dummy; 2131 hammer2_key_t lbase; 2132 uint8_t btype; 2133 2134 bio = ap->a_bio; 2135 bp = bio->bio_buf; 2136 ip = VTOI(ap->a_vp); 2137 nbio = push_bio(bio); 2138 2139 lbase = bio->bio_offset; 2140 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 2141 2142 /* 2143 * Lookup the file offset. 2144 */ 2145 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 2146 HAMMER2_RESOLVE_SHARED); 2147 cluster = hammer2_cluster_lookup(cparent, &key_dummy, 2148 lbase, lbase, 2149 HAMMER2_LOOKUP_NODATA | 2150 HAMMER2_LOOKUP_SHARED); 2151 hammer2_inode_unlock(ip, cparent); 2152 2153 /* 2154 * Data is zero-fill if no cluster could be found 2155 * (XXX or EIO on a cluster failure). 2156 */ 2157 if (cluster == NULL) { 2158 bp->b_resid = 0; 2159 bp->b_error = 0; 2160 bzero(bp->b_data, bp->b_bcount); 2161 biodone(nbio); 2162 return(0); 2163 } 2164 2165 /* 2166 * Cluster elements must be type INODE or type DATA, but the 2167 * compression mode (or not) for DATA chains can be different for 2168 * each chain. This will be handled by the callback. 2169 * 2170 * If the cluster already has valid data the callback will be made 2171 * immediately/synchronously. 2172 */ 2173 btype = hammer2_cluster_type(cluster); 2174 if (btype != HAMMER2_BREF_TYPE_INODE && 2175 btype != HAMMER2_BREF_TYPE_DATA) { 2176 panic("READ PATH: hammer2_strategy_read: unknown bref type"); 2177 } 2178 hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback, 2179 nbio); 2180 return(0); 2181 } 2182 2183 /* 2184 * Read callback for hammer2_cluster_load_async(). The load function may 2185 * start several actual I/Os but will only make one callback, typically with 2186 * the first valid I/O XXX 2187 */ 2188 static 2189 void 2190 hammer2_strategy_read_callback(hammer2_iocb_t *iocb) 2191 { 2192 struct bio *bio = iocb->ptr; /* original logical buffer */ 2193 struct buf *bp = bio->bio_buf; /* original logical buffer */ 2194 hammer2_chain_t *chain; 2195 hammer2_cluster_t *cluster; 2196 hammer2_io_t *dio; 2197 char *data; 2198 int i; 2199 2200 /* 2201 * Extract data and handle iteration on I/O failure. iocb->off 2202 * is the cluster index for iteration. 2203 */ 2204 cluster = iocb->cluster; 2205 dio = iocb->dio; /* can be NULL if iocb not in progress */ 2206 2207 /* 2208 * Work to do if INPROG set, else dio is already good or dio is 2209 * NULL (which is the shortcut case if chain->data is already good). 2210 */ 2211 if (iocb->flags & HAMMER2_IOCB_INPROG) { 2212 /* 2213 * Read attempt not yet made. Issue an asynchronous read 2214 * if necessary and return, operation will chain back to 2215 * this function. 2216 */ 2217 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) { 2218 if (dio->bp == NULL || 2219 (dio->bp->b_flags & B_CACHE) == 0) { 2220 if (dio->bp) { 2221 bqrelse(dio->bp); 2222 dio->bp = NULL; 2223 } 2224 iocb->flags |= HAMMER2_IOCB_READ; 2225 breadcb(dio->hmp->devvp, 2226 dio->pbase, dio->psize, 2227 hammer2_io_callback, iocb); 2228 return; 2229 } 2230 } 2231 } 2232 2233 /* 2234 * If we have a DIO it is now done, check for an error and 2235 * calculate the data. 2236 * 2237 * If there is no DIO it is an optimization by 2238 * hammer2_cluster_load_async(), the data is available in 2239 * chain->data. 2240 */ 2241 if (dio) { 2242 if (dio->bp->b_flags & B_ERROR) { 2243 i = (int)iocb->lbase + 1; 2244 if (i >= cluster->nchains) { 2245 bp->b_flags |= B_ERROR; 2246 bp->b_error = dio->bp->b_error; 2247 hammer2_io_complete(iocb); 2248 biodone(bio); 2249 hammer2_cluster_unlock(cluster); 2250 hammer2_cluster_drop(cluster); 2251 } else { 2252 hammer2_io_complete(iocb); /* XXX */ 2253 chain = cluster->array[i].chain; 2254 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain); 2255 hammer2_adjreadcounter(&chain->bref, 2256 chain->bytes); 2257 iocb->chain = chain; 2258 iocb->lbase = (off_t)i; 2259 iocb->flags = 0; 2260 iocb->error = 0; 2261 hammer2_io_getblk(chain->hmp, 2262 chain->bref.data_off, 2263 chain->bytes, 2264 iocb); 2265 } 2266 return; 2267 } 2268 chain = iocb->chain; 2269 data = hammer2_io_data(dio, chain->bref.data_off); 2270 } else { 2271 /* 2272 * Special synchronous case, data present in chain->data. 2273 */ 2274 chain = iocb->chain; 2275 data = (void *)chain->data; 2276 } 2277 2278 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2279 /* 2280 * Data is embedded in the inode (copy from inode). 2281 */ 2282 bcopy(((hammer2_inode_data_t *)data)->u.data, 2283 bp->b_data, HAMMER2_EMBEDDED_BYTES); 2284 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2285 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2286 bp->b_resid = 0; 2287 bp->b_error = 0; 2288 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2289 /* 2290 * Data is on-media, issue device I/O and copy. 2291 * 2292 * XXX direct-IO shortcut could go here XXX. 2293 */ 2294 switch (HAMMER2_DEC_COMP(chain->bref.methods)) { 2295 case HAMMER2_COMP_LZ4: 2296 hammer2_decompress_LZ4_callback(data, chain->bytes, 2297 bio); 2298 break; 2299 case HAMMER2_COMP_ZLIB: 2300 hammer2_decompress_ZLIB_callback(data, chain->bytes, 2301 bio); 2302 break; 2303 case HAMMER2_COMP_NONE: 2304 KKASSERT(chain->bytes <= bp->b_bcount); 2305 bcopy(data, bp->b_data, chain->bytes); 2306 if (chain->bytes < bp->b_bcount) { 2307 bzero(bp->b_data + chain->bytes, 2308 bp->b_bcount - chain->bytes); 2309 } 2310 bp->b_flags |= B_NOTMETA; 2311 bp->b_resid = 0; 2312 bp->b_error = 0; 2313 break; 2314 default: 2315 panic("hammer2_strategy_read: " 2316 "unknown compression type"); 2317 } 2318 } else { 2319 /* bqrelse the dio to help stabilize the call to panic() */ 2320 if (dio) 2321 hammer2_io_bqrelse(&dio); 2322 panic("hammer2_strategy_read: unknown bref type"); 2323 } 2324 2325 /* 2326 * Once the iocb is cleaned up the DIO (if any) will no longer be 2327 * in-progress but will still have a ref. Be sure to release 2328 * the ref. 2329 */ 2330 hammer2_io_complete(iocb); /* physical management */ 2331 if (dio) /* physical dio & buffer */ 2332 hammer2_io_bqrelse(&dio); 2333 hammer2_cluster_unlock(cluster); /* cluster management */ 2334 hammer2_cluster_drop(cluster); /* cluster management */ 2335 biodone(bio); /* logical buffer */ 2336 } 2337 2338 static 2339 int 2340 hammer2_strategy_write(struct vop_strategy_args *ap) 2341 { 2342 hammer2_pfs_t *pmp; 2343 struct bio *bio; 2344 struct buf *bp; 2345 hammer2_inode_t *ip; 2346 2347 bio = ap->a_bio; 2348 bp = bio->bio_buf; 2349 ip = VTOI(ap->a_vp); 2350 pmp = ip->pmp; 2351 2352 hammer2_lwinprog_ref(pmp); 2353 hammer2_mtx_ex(&pmp->wthread_mtx); 2354 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) { 2355 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2356 hammer2_mtx_unlock(&pmp->wthread_mtx); 2357 wakeup(&pmp->wthread_bioq); 2358 } else { 2359 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2360 hammer2_mtx_unlock(&pmp->wthread_mtx); 2361 } 2362 hammer2_lwinprog_wait(pmp); 2363 2364 return(0); 2365 } 2366 2367 /* 2368 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2369 */ 2370 static 2371 int 2372 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2373 { 2374 hammer2_inode_t *ip; 2375 int error; 2376 2377 LOCKSTART; 2378 ip = VTOI(ap->a_vp); 2379 2380 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2381 ap->a_fflag, ap->a_cred); 2382 LOCKSTOP; 2383 return (error); 2384 } 2385 2386 static 2387 int 2388 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2389 { 2390 struct mount *mp; 2391 hammer2_pfs_t *pmp; 2392 int rc; 2393 2394 LOCKSTART; 2395 switch (ap->a_op) { 2396 case (MOUNTCTL_SET_EXPORT): 2397 mp = ap->a_head.a_ops->head.vv_mount; 2398 pmp = MPTOPMP(mp); 2399 2400 if (ap->a_ctllen != sizeof(struct export_args)) 2401 rc = (EINVAL); 2402 else 2403 rc = vfs_export(mp, &pmp->export, 2404 (const struct export_args *)ap->a_ctl); 2405 break; 2406 default: 2407 rc = vop_stdmountctl(ap); 2408 break; 2409 } 2410 LOCKSTOP; 2411 return (rc); 2412 } 2413 2414 /* 2415 * This handles unlinked open files after the vnode is finally dereferenced. 2416 * To avoid deadlocks it cannot be called from the normal vnode recycling 2417 * path, so we call it (1) after a unlink, rmdir, or rename, (2) on every 2418 * flush, and (3) on umount. 2419 */ 2420 void 2421 hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfs_t *pmp) 2422 { 2423 const hammer2_inode_data_t *ripdata; 2424 hammer2_inode_unlink_t *ipul; 2425 hammer2_inode_t *ip; 2426 hammer2_cluster_t *cluster; 2427 hammer2_cluster_t *cparent; 2428 2429 if (TAILQ_EMPTY(&pmp->unlinkq)) 2430 return; 2431 2432 LOCKSTART; 2433 hammer2_spin_ex(&pmp->list_spin); 2434 while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) { 2435 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry); 2436 hammer2_spin_unex(&pmp->list_spin); 2437 ip = ipul->ip; 2438 kfree(ipul, pmp->minode); 2439 2440 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 2441 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 2442 if (hammer2_debug & 0x400) { 2443 kprintf("hammer2: unlink on reclaim: %s refs=%d\n", 2444 ripdata->filename, ip->refs); 2445 } 2446 KKASSERT(ripdata->nlinks == 0); 2447 2448 cparent = hammer2_cluster_parent(cluster); 2449 hammer2_cluster_delete(trans, cparent, cluster, 2450 HAMMER2_DELETE_PERMANENT); 2451 hammer2_cluster_unlock(cparent); 2452 hammer2_cluster_drop(cparent); 2453 hammer2_inode_unlock(ip, cluster); /* inode lock */ 2454 hammer2_inode_drop(ip); /* ipul ref */ 2455 2456 hammer2_spin_ex(&pmp->list_spin); 2457 } 2458 hammer2_spin_unex(&pmp->list_spin); 2459 LOCKSTOP; 2460 } 2461 2462 2463 /* 2464 * KQFILTER 2465 */ 2466 static void filt_hammer2detach(struct knote *kn); 2467 static int filt_hammer2read(struct knote *kn, long hint); 2468 static int filt_hammer2write(struct knote *kn, long hint); 2469 static int filt_hammer2vnode(struct knote *kn, long hint); 2470 2471 static struct filterops hammer2read_filtops = 2472 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2473 NULL, filt_hammer2detach, filt_hammer2read }; 2474 static struct filterops hammer2write_filtops = 2475 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2476 NULL, filt_hammer2detach, filt_hammer2write }; 2477 static struct filterops hammer2vnode_filtops = 2478 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2479 NULL, filt_hammer2detach, filt_hammer2vnode }; 2480 2481 static 2482 int 2483 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap) 2484 { 2485 struct vnode *vp = ap->a_vp; 2486 struct knote *kn = ap->a_kn; 2487 2488 switch (kn->kn_filter) { 2489 case EVFILT_READ: 2490 kn->kn_fop = &hammer2read_filtops; 2491 break; 2492 case EVFILT_WRITE: 2493 kn->kn_fop = &hammer2write_filtops; 2494 break; 2495 case EVFILT_VNODE: 2496 kn->kn_fop = &hammer2vnode_filtops; 2497 break; 2498 default: 2499 return (EOPNOTSUPP); 2500 } 2501 2502 kn->kn_hook = (caddr_t)vp; 2503 2504 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2505 2506 return(0); 2507 } 2508 2509 static void 2510 filt_hammer2detach(struct knote *kn) 2511 { 2512 struct vnode *vp = (void *)kn->kn_hook; 2513 2514 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2515 } 2516 2517 static int 2518 filt_hammer2read(struct knote *kn, long hint) 2519 { 2520 struct vnode *vp = (void *)kn->kn_hook; 2521 hammer2_inode_t *ip = VTOI(vp); 2522 off_t off; 2523 2524 if (hint == NOTE_REVOKE) { 2525 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2526 return(1); 2527 } 2528 off = ip->size - kn->kn_fp->f_offset; 2529 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2530 if (kn->kn_sfflags & NOTE_OLDAPI) 2531 return(1); 2532 return (kn->kn_data != 0); 2533 } 2534 2535 2536 static int 2537 filt_hammer2write(struct knote *kn, long hint) 2538 { 2539 if (hint == NOTE_REVOKE) 2540 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2541 kn->kn_data = 0; 2542 return (1); 2543 } 2544 2545 static int 2546 filt_hammer2vnode(struct knote *kn, long hint) 2547 { 2548 if (kn->kn_sfflags & hint) 2549 kn->kn_fflags |= hint; 2550 if (hint == NOTE_REVOKE) { 2551 kn->kn_flags |= (EV_EOF | EV_NODATA); 2552 return (1); 2553 } 2554 return (kn->kn_fflags != 0); 2555 } 2556 2557 /* 2558 * FIFO VOPS 2559 */ 2560 static 2561 int 2562 hammer2_vop_markatime(struct vop_markatime_args *ap) 2563 { 2564 hammer2_inode_t *ip; 2565 struct vnode *vp; 2566 2567 vp = ap->a_vp; 2568 ip = VTOI(vp); 2569 2570 if (ip->pmp->ronly) 2571 return(EROFS); 2572 return(0); 2573 } 2574 2575 static 2576 int 2577 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap) 2578 { 2579 int error; 2580 2581 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2582 if (error) 2583 error = hammer2_vop_kqfilter(ap); 2584 return(error); 2585 } 2586 2587 /* 2588 * VOPS vector 2589 */ 2590 struct vop_ops hammer2_vnode_vops = { 2591 .vop_default = vop_defaultop, 2592 .vop_fsync = hammer2_vop_fsync, 2593 .vop_getpages = vop_stdgetpages, 2594 .vop_putpages = vop_stdputpages, 2595 .vop_access = hammer2_vop_access, 2596 .vop_advlock = hammer2_vop_advlock, 2597 .vop_close = hammer2_vop_close, 2598 .vop_nlink = hammer2_vop_nlink, 2599 .vop_ncreate = hammer2_vop_ncreate, 2600 .vop_nsymlink = hammer2_vop_nsymlink, 2601 .vop_nremove = hammer2_vop_nremove, 2602 .vop_nrmdir = hammer2_vop_nrmdir, 2603 .vop_nrename = hammer2_vop_nrename, 2604 .vop_getattr = hammer2_vop_getattr, 2605 .vop_setattr = hammer2_vop_setattr, 2606 .vop_readdir = hammer2_vop_readdir, 2607 .vop_readlink = hammer2_vop_readlink, 2608 .vop_getpages = vop_stdgetpages, 2609 .vop_putpages = vop_stdputpages, 2610 .vop_read = hammer2_vop_read, 2611 .vop_write = hammer2_vop_write, 2612 .vop_open = hammer2_vop_open, 2613 .vop_inactive = hammer2_vop_inactive, 2614 .vop_reclaim = hammer2_vop_reclaim, 2615 .vop_nresolve = hammer2_vop_nresolve, 2616 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2617 .vop_nmkdir = hammer2_vop_nmkdir, 2618 .vop_nmknod = hammer2_vop_nmknod, 2619 .vop_ioctl = hammer2_vop_ioctl, 2620 .vop_mountctl = hammer2_vop_mountctl, 2621 .vop_bmap = hammer2_vop_bmap, 2622 .vop_strategy = hammer2_vop_strategy, 2623 .vop_kqfilter = hammer2_vop_kqfilter 2624 }; 2625 2626 struct vop_ops hammer2_spec_vops = { 2627 .vop_default = vop_defaultop, 2628 .vop_fsync = hammer2_vop_fsync, 2629 .vop_read = vop_stdnoread, 2630 .vop_write = vop_stdnowrite, 2631 .vop_access = hammer2_vop_access, 2632 .vop_close = hammer2_vop_close, 2633 .vop_markatime = hammer2_vop_markatime, 2634 .vop_getattr = hammer2_vop_getattr, 2635 .vop_inactive = hammer2_vop_inactive, 2636 .vop_reclaim = hammer2_vop_reclaim, 2637 .vop_setattr = hammer2_vop_setattr 2638 }; 2639 2640 struct vop_ops hammer2_fifo_vops = { 2641 .vop_default = fifo_vnoperate, 2642 .vop_fsync = hammer2_vop_fsync, 2643 #if 0 2644 .vop_read = hammer2_vop_fiforead, 2645 .vop_write = hammer2_vop_fifowrite, 2646 #endif 2647 .vop_access = hammer2_vop_access, 2648 #if 0 2649 .vop_close = hammer2_vop_fifoclose, 2650 #endif 2651 .vop_markatime = hammer2_vop_markatime, 2652 .vop_getattr = hammer2_vop_getattr, 2653 .vop_inactive = hammer2_vop_inactive, 2654 .vop_reclaim = hammer2_vop_reclaim, 2655 .vop_setattr = hammer2_vop_setattr, 2656 .vop_kqfilter = hammer2_vop_fifokqfilter 2657 }; 2658 2659