1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/mountctl.h> 53 #include <sys/dirent.h> 54 #include <sys/uio.h> 55 #include <sys/objcache.h> 56 #include <sys/event.h> 57 #include <sys/file.h> 58 #include <vfs/fifofs/fifo.h> 59 60 #include "hammer2.h" 61 #include "hammer2_lz4.h" 62 63 #include "zlib/hammer2_zlib.h" 64 65 #define ZFOFFSET (-2LL) 66 67 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 68 int seqcount); 69 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 70 int ioflag, int seqcount); 71 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 72 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 73 74 struct objcache *cache_buffer_read; 75 struct objcache *cache_buffer_write; 76 77 /* 78 * Callback used in read path in case that a block is compressed with LZ4. 79 */ 80 static 81 void 82 hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio) 83 { 84 struct buf *bp; 85 char *compressed_buffer; 86 int compressed_size; 87 int result; 88 89 bp = bio->bio_buf; 90 91 #if 0 92 if bio->bio_caller_info2.index && 93 bio->bio_caller_info1.uvalue32 != 94 crc32(bp->b_data, bp->b_bufsize) --- return error 95 #endif 96 97 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 98 compressed_size = *(const int *)data; 99 KKASSERT(compressed_size <= bytes - sizeof(int)); 100 101 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 102 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]), 103 compressed_buffer, 104 compressed_size, 105 bp->b_bufsize); 106 if (result < 0) { 107 kprintf("READ PATH: Error during decompression." 108 "bio %016jx/%d\n", 109 (intmax_t)bio->bio_offset, bytes); 110 /* make sure it isn't random garbage */ 111 bzero(compressed_buffer, bp->b_bufsize); 112 } 113 KKASSERT(result <= bp->b_bufsize); 114 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 115 if (result < bp->b_bufsize) 116 bzero(bp->b_data + result, bp->b_bufsize - result); 117 objcache_put(cache_buffer_read, compressed_buffer); 118 bp->b_resid = 0; 119 bp->b_flags |= B_AGE; 120 } 121 122 /* 123 * Callback used in read path in case that a block is compressed with ZLIB. 124 * It is almost identical to LZ4 callback, so in theory they can be unified, 125 * but we didn't want to make changes in bio structure for that. 126 */ 127 static 128 void 129 hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio) 130 { 131 struct buf *bp; 132 char *compressed_buffer; 133 z_stream strm_decompress; 134 int result; 135 int ret; 136 137 bp = bio->bio_buf; 138 139 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); 140 strm_decompress.avail_in = 0; 141 strm_decompress.next_in = Z_NULL; 142 143 ret = inflateInit(&strm_decompress); 144 145 if (ret != Z_OK) 146 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); 147 148 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 149 strm_decompress.next_in = __DECONST(char *, data); 150 151 /* XXX supply proper size, subset of device bp */ 152 strm_decompress.avail_in = bytes; 153 strm_decompress.next_out = compressed_buffer; 154 strm_decompress.avail_out = bp->b_bufsize; 155 156 ret = inflate(&strm_decompress, Z_FINISH); 157 if (ret != Z_STREAM_END) { 158 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); 159 bzero(compressed_buffer, bp->b_bufsize); 160 } 161 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); 162 result = bp->b_bufsize - strm_decompress.avail_out; 163 if (result < bp->b_bufsize) 164 bzero(bp->b_data + result, strm_decompress.avail_out); 165 objcache_put(cache_buffer_read, compressed_buffer); 166 ret = inflateEnd(&strm_decompress); 167 168 bp->b_resid = 0; 169 bp->b_flags |= B_AGE; 170 } 171 172 static __inline 173 void 174 hammer2_knote(struct vnode *vp, int flags) 175 { 176 if (flags) 177 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 178 } 179 180 /* 181 * Last reference to a vnode is going away but it is still cached. 182 */ 183 static 184 int 185 hammer2_vop_inactive(struct vop_inactive_args *ap) 186 { 187 hammer2_inode_t *ip; 188 hammer2_cluster_t *cluster; 189 struct vnode *vp; 190 191 LOCKSTART; 192 vp = ap->a_vp; 193 ip = VTOI(vp); 194 195 /* 196 * Degenerate case 197 */ 198 if (ip == NULL) { 199 vrecycle(vp); 200 LOCKSTOP; 201 return (0); 202 } 203 204 /* 205 * Detect updates to the embedded data which may be synchronized by 206 * the strategy code. Simply mark the inode modified so it gets 207 * picked up by our normal flush. 208 */ 209 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_NEVER | 210 HAMMER2_RESOLVE_RDONLY); 211 KKASSERT(cluster); 212 213 /* 214 * Check for deleted inodes and recycle immediately. 215 * 216 * WARNING: nvtruncbuf() can only be safely called without the inode 217 * lock held due to the way our write thread works. 218 */ 219 if (hammer2_cluster_isunlinked(cluster)) { 220 hammer2_key_t lbase; 221 int nblksize; 222 223 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL); 224 hammer2_inode_unlock(ip, cluster); 225 nvtruncbuf(vp, 0, nblksize, 0, 0); 226 vrecycle(vp); 227 } else { 228 hammer2_inode_unlock(ip, cluster); 229 } 230 LOCKSTOP; 231 return (0); 232 } 233 234 /* 235 * Reclaim a vnode so that it can be reused; after the inode is 236 * disassociated, the filesystem must manage it alone. 237 */ 238 static 239 int 240 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 241 { 242 hammer2_cluster_t *cluster; 243 hammer2_inode_t *ip; 244 hammer2_pfs_t *pmp; 245 struct vnode *vp; 246 247 LOCKSTART; 248 vp = ap->a_vp; 249 ip = VTOI(vp); 250 if (ip == NULL) { 251 LOCKSTOP; 252 return(0); 253 } 254 255 /* 256 * Inode must be locked for reclaim. 257 */ 258 pmp = ip->pmp; 259 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_NEVER | 260 HAMMER2_RESOLVE_RDONLY); 261 262 /* 263 * The final close of a deleted file or directory marks it for 264 * destruction. The DELETED flag allows the flusher to shortcut 265 * any modified blocks still unflushed (that is, just ignore them). 266 * 267 * HAMMER2 usually does not try to optimize the freemap by returning 268 * deleted blocks to it as it does not usually know how many snapshots 269 * might be referencing portions of the file/dir. 270 */ 271 vp->v_data = NULL; 272 ip->vp = NULL; 273 274 /* 275 * NOTE! We do not attempt to flush chains here, flushing is 276 * really fragile and could also deadlock. 277 */ 278 vclrisdirty(vp); 279 280 /* 281 * A reclaim can occur at any time so we cannot safely start a 282 * transaction to handle reclamation of unlinked files. Instead, 283 * the ip is left with a reference and placed on a linked list and 284 * handled later on. 285 */ 286 if (hammer2_cluster_isunlinked(cluster)) { 287 hammer2_inode_unlink_t *ipul; 288 289 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO); 290 ipul->ip = ip; 291 292 hammer2_spin_ex(&pmp->list_spin); 293 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry); 294 hammer2_spin_unex(&pmp->list_spin); 295 hammer2_inode_unlock(ip, cluster); /* unlock */ 296 /* retain ref from vp for ipul */ 297 } else { 298 hammer2_inode_unlock(ip, cluster); /* unlock */ 299 hammer2_inode_drop(ip); /* vp ref */ 300 } 301 /* cluster no longer referenced */ 302 /* cluster = NULL; not needed */ 303 304 /* 305 * XXX handle background sync when ip dirty, kernel will no longer 306 * notify us regarding this inode because there is no longer a 307 * vnode attached to it. 308 */ 309 310 LOCKSTOP; 311 return (0); 312 } 313 314 static 315 int 316 hammer2_vop_fsync(struct vop_fsync_args *ap) 317 { 318 hammer2_inode_t *ip; 319 hammer2_trans_t trans; 320 hammer2_cluster_t *cluster; 321 struct vnode *vp; 322 323 LOCKSTART; 324 vp = ap->a_vp; 325 ip = VTOI(vp); 326 327 #if 0 328 /* XXX can't do this yet */ 329 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH); 330 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 331 #endif 332 hammer2_trans_init(&trans, ip->pmp, 0); 333 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 334 335 /* 336 * Calling chain_flush here creates a lot of duplicative 337 * COW operations due to non-optimal vnode ordering. 338 * 339 * Only do it for an actual fsync() syscall. The other forms 340 * which call this function will eventually call chain_flush 341 * on the volume root as a catch-all, which is far more optimal. 342 */ 343 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 344 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 345 /*vclrisdirty(vp);*/ 346 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME)) 347 hammer2_inode_fsync(&trans, ip, cluster); 348 349 hammer2_inode_unlock(ip, cluster); 350 hammer2_trans_done(&trans); 351 352 LOCKSTOP; 353 return (0); 354 } 355 356 static 357 int 358 hammer2_vop_access(struct vop_access_args *ap) 359 { 360 hammer2_inode_t *ip = VTOI(ap->a_vp); 361 const hammer2_inode_data_t *ripdata; 362 hammer2_cluster_t *cluster; 363 uid_t uid; 364 gid_t gid; 365 int error; 366 367 LOCKSTART; 368 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 369 HAMMER2_RESOLVE_SHARED); 370 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 371 uid = hammer2_to_unix_xid(&ripdata->uid); 372 gid = hammer2_to_unix_xid(&ripdata->gid); 373 error = vop_helper_access(ap, uid, gid, ripdata->mode, ripdata->uflags); 374 hammer2_inode_unlock(ip, cluster); 375 376 LOCKSTOP; 377 return (error); 378 } 379 380 static 381 int 382 hammer2_vop_getattr(struct vop_getattr_args *ap) 383 { 384 const hammer2_inode_data_t *ripdata; 385 hammer2_cluster_t *cluster; 386 hammer2_pfs_t *pmp; 387 hammer2_inode_t *ip; 388 hammer2_blockref_t bref; 389 struct vnode *vp; 390 struct vattr *vap; 391 392 LOCKSTART; 393 vp = ap->a_vp; 394 vap = ap->a_vap; 395 396 ip = VTOI(vp); 397 pmp = ip->pmp; 398 399 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 400 HAMMER2_RESOLVE_SHARED); 401 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 402 KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); 403 hammer2_cluster_bref(cluster, &bref); 404 405 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 406 vap->va_fileid = ripdata->inum; 407 vap->va_mode = ripdata->mode; 408 vap->va_nlink = ripdata->nlinks; 409 vap->va_uid = hammer2_to_unix_xid(&ripdata->uid); 410 vap->va_gid = hammer2_to_unix_xid(&ripdata->gid); 411 vap->va_rmajor = 0; 412 vap->va_rminor = 0; 413 vap->va_size = ip->size; /* protected by shared lock */ 414 vap->va_blocksize = HAMMER2_PBUFSIZE; 415 vap->va_flags = ripdata->uflags; 416 hammer2_time_to_timespec(ripdata->ctime, &vap->va_ctime); 417 hammer2_time_to_timespec(ripdata->mtime, &vap->va_mtime); 418 hammer2_time_to_timespec(ripdata->mtime, &vap->va_atime); 419 vap->va_gen = 1; 420 vap->va_bytes = bref.data_count; 421 vap->va_type = hammer2_get_vtype(ripdata); 422 vap->va_filerev = 0; 423 vap->va_uid_uuid = ripdata->uid; 424 vap->va_gid_uuid = ripdata->gid; 425 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 426 VA_FSID_UUID_VALID; 427 428 hammer2_inode_unlock(ip, cluster); 429 430 LOCKSTOP; 431 return (0); 432 } 433 434 static 435 int 436 hammer2_vop_setattr(struct vop_setattr_args *ap) 437 { 438 const hammer2_inode_data_t *ripdata; 439 hammer2_inode_data_t *wipdata; 440 hammer2_inode_t *ip; 441 hammer2_cluster_t *cluster; 442 hammer2_trans_t trans; 443 struct vnode *vp; 444 struct vattr *vap; 445 int error; 446 int kflags = 0; 447 int domtime = 0; 448 int dosync = 0; 449 uint64_t ctime; 450 451 LOCKSTART; 452 vp = ap->a_vp; 453 vap = ap->a_vap; 454 hammer2_update_time(&ctime); 455 456 ip = VTOI(vp); 457 458 if (ip->pmp->ronly) { 459 LOCKSTOP; 460 return(EROFS); 461 } 462 463 hammer2_pfs_memory_wait(ip->pmp); 464 hammer2_trans_init(&trans, ip->pmp, 0); 465 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 466 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 467 error = 0; 468 469 if (vap->va_flags != VNOVAL) { 470 u_int32_t flags; 471 472 flags = ripdata->uflags; 473 error = vop_helper_setattr_flags(&flags, vap->va_flags, 474 hammer2_to_unix_xid(&ripdata->uid), 475 ap->a_cred); 476 if (error == 0) { 477 if (ripdata->uflags != flags) { 478 wipdata = hammer2_cluster_modify_ip(&trans, ip, 479 cluster, 0); 480 wipdata->uflags = flags; 481 wipdata->ctime = ctime; 482 kflags |= NOTE_ATTRIB; 483 dosync = 1; 484 ripdata = wipdata; 485 } 486 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 487 error = 0; 488 goto done; 489 } 490 } 491 goto done; 492 } 493 if (ripdata->uflags & (IMMUTABLE | APPEND)) { 494 error = EPERM; 495 goto done; 496 } 497 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 498 mode_t cur_mode = ripdata->mode; 499 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 500 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 501 uuid_t uuid_uid; 502 uuid_t uuid_gid; 503 504 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 505 ap->a_cred, 506 &cur_uid, &cur_gid, &cur_mode); 507 if (error == 0) { 508 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 509 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 510 if (bcmp(&uuid_uid, &ripdata->uid, sizeof(uuid_uid)) || 511 bcmp(&uuid_gid, &ripdata->gid, sizeof(uuid_gid)) || 512 ripdata->mode != cur_mode 513 ) { 514 wipdata = hammer2_cluster_modify_ip(&trans, ip, 515 cluster, 0); 516 wipdata->uid = uuid_uid; 517 wipdata->gid = uuid_gid; 518 wipdata->mode = cur_mode; 519 wipdata->ctime = ctime; 520 dosync = 1; 521 ripdata = wipdata; 522 } 523 kflags |= NOTE_ATTRIB; 524 } 525 } 526 527 /* 528 * Resize the file 529 */ 530 if (vap->va_size != VNOVAL && ip->size != vap->va_size) { 531 switch(vp->v_type) { 532 case VREG: 533 if (vap->va_size == ip->size) 534 break; 535 hammer2_inode_unlock(ip, cluster); 536 if (vap->va_size < ip->size) { 537 hammer2_truncate_file(ip, vap->va_size); 538 } else { 539 hammer2_extend_file(ip, vap->va_size); 540 } 541 cluster = hammer2_inode_lock(ip, 542 HAMMER2_RESOLVE_ALWAYS); 543 /* RELOAD */ 544 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 545 domtime = 1; 546 break; 547 default: 548 error = EINVAL; 549 goto done; 550 } 551 } 552 #if 0 553 /* atime not supported */ 554 if (vap->va_atime.tv_sec != VNOVAL) { 555 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 556 wipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 557 kflags |= NOTE_ATTRIB; 558 dosync = 1; 559 ripdata = wipdata; 560 } 561 #endif 562 if (vap->va_mtime.tv_sec != VNOVAL) { 563 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); 564 wipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 565 kflags |= NOTE_ATTRIB; 566 domtime = 0; 567 dosync = 1; 568 ripdata = wipdata; 569 } 570 if (vap->va_mode != (mode_t)VNOVAL) { 571 mode_t cur_mode = ripdata->mode; 572 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid); 573 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid); 574 575 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 576 cur_uid, cur_gid, &cur_mode); 577 if (error == 0 && ripdata->mode != cur_mode) { 578 wipdata = hammer2_cluster_modify_ip(&trans, ip, 579 cluster, 0); 580 wipdata->mode = cur_mode; 581 wipdata->ctime = ctime; 582 kflags |= NOTE_ATTRIB; 583 dosync = 1; 584 ripdata = wipdata; 585 } 586 } 587 588 /* 589 * If a truncation occurred we must call inode_fsync() now in order 590 * to trim the related data chains, otherwise a later expansion can 591 * cause havoc. 592 */ 593 if (dosync) { 594 hammer2_cluster_modsync(cluster); 595 dosync = 0; 596 } 597 hammer2_inode_fsync(&trans, ip, cluster); 598 599 /* 600 * Cleanup. If domtime is set an additional inode modification 601 * must be flagged. All other modifications will have already 602 * set INODE_MODIFIED and called vsetisdirty(). 603 */ 604 done: 605 if (domtime) { 606 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED | 607 HAMMER2_INODE_MTIME); 608 vsetisdirty(ip->vp); 609 } 610 if (dosync) 611 hammer2_cluster_modsync(cluster); 612 hammer2_inode_unlock(ip, cluster); 613 hammer2_trans_done(&trans); 614 hammer2_knote(ip->vp, kflags); 615 616 LOCKSTOP; 617 return (error); 618 } 619 620 static 621 int 622 hammer2_vop_readdir(struct vop_readdir_args *ap) 623 { 624 const hammer2_inode_data_t *ripdata; 625 hammer2_inode_t *ip; 626 hammer2_inode_t *xip; 627 hammer2_cluster_t *cparent; 628 hammer2_cluster_t *cluster; 629 hammer2_cluster_t *xcluster; 630 hammer2_blockref_t bref; 631 hammer2_tid_t inum; 632 hammer2_key_t key_next; 633 hammer2_key_t lkey; 634 struct uio *uio; 635 off_t *cookies; 636 off_t saveoff; 637 int cookie_index; 638 int ncookies; 639 int error; 640 int dtype; 641 int r; 642 643 LOCKSTART; 644 ip = VTOI(ap->a_vp); 645 uio = ap->a_uio; 646 saveoff = uio->uio_offset; 647 648 /* 649 * Setup cookies directory entry cookies if requested 650 */ 651 if (ap->a_ncookies) { 652 ncookies = uio->uio_resid / 16 + 1; 653 if (ncookies > 1024) 654 ncookies = 1024; 655 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 656 } else { 657 ncookies = -1; 658 cookies = NULL; 659 } 660 cookie_index = 0; 661 662 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 663 HAMMER2_RESOLVE_SHARED); 664 665 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 666 667 /* 668 * Handle artificial entries. To ensure that only positive 64 bit 669 * quantities are returned to userland we always strip off bit 63. 670 * The hash code is designed such that codes 0x0000-0x7FFF are not 671 * used, allowing us to use these codes for articial entries. 672 * 673 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 674 * allow '..' to cross the mount point into (e.g.) the super-root. 675 */ 676 error = 0; 677 cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 678 679 if (saveoff == 0) { 680 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK; 681 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 682 if (r) 683 goto done; 684 if (cookies) 685 cookies[cookie_index] = saveoff; 686 ++saveoff; 687 ++cookie_index; 688 if (cookie_index == ncookies) 689 goto done; 690 } 691 692 if (saveoff == 1) { 693 /* 694 * Be careful with lockorder when accessing ".." 695 * 696 * (ip is the current dir. xip is the parent dir). 697 */ 698 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK; 699 while (ip->pip != NULL && ip != ip->pmp->iroot) { 700 xip = ip->pip; 701 hammer2_inode_ref(xip); 702 hammer2_inode_unlock(ip, cparent); 703 xcluster = hammer2_inode_lock(xip, 704 HAMMER2_RESOLVE_ALWAYS | 705 HAMMER2_RESOLVE_SHARED); 706 707 cparent = hammer2_inode_lock(ip, 708 HAMMER2_RESOLVE_ALWAYS | 709 HAMMER2_RESOLVE_SHARED); 710 hammer2_inode_drop(xip); 711 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 712 if (xip == ip->pip) { 713 inum = hammer2_cluster_rdata(xcluster)-> 714 ipdata.inum & HAMMER2_DIRHASH_USERMSK; 715 hammer2_inode_unlock(xip, xcluster); 716 break; 717 } 718 hammer2_inode_unlock(xip, xcluster); 719 } 720 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 721 if (r) 722 goto done; 723 if (cookies) 724 cookies[cookie_index] = saveoff; 725 ++saveoff; 726 ++cookie_index; 727 if (cookie_index == ncookies) 728 goto done; 729 } 730 731 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 732 if (hammer2_debug & 0x0020) 733 kprintf("readdir: lkey %016jx\n", lkey); 734 735 /* 736 * parent is the inode cluster, already locked for us. Don't 737 * double lock shared locks as this will screw up upgrades. 738 */ 739 if (error) { 740 goto done; 741 } 742 cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey, 743 HAMMER2_LOOKUP_SHARED); 744 if (cluster == NULL) { 745 cluster = hammer2_cluster_lookup(cparent, &key_next, 746 lkey, (hammer2_key_t)-1, 747 HAMMER2_LOOKUP_SHARED); 748 } 749 if (cluster) 750 hammer2_cluster_bref(cluster, &bref); 751 while (cluster) { 752 if (hammer2_debug & 0x0020) 753 kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n", 754 cparent->focus, cluster->focus, 755 bref.key, key_next); 756 757 if (bref.type == HAMMER2_BREF_TYPE_INODE) { 758 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 759 dtype = hammer2_get_dtype(ripdata); 760 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; 761 r = vop_write_dirent(&error, uio, 762 ripdata->inum & 763 HAMMER2_DIRHASH_USERMSK, 764 dtype, 765 ripdata->name_len, 766 ripdata->filename); 767 if (r) 768 break; 769 if (cookies) 770 cookies[cookie_index] = saveoff; 771 ++cookie_index; 772 } else { 773 /* XXX chain error */ 774 kprintf("bad chain type readdir %d\n", bref.type); 775 } 776 777 /* 778 * Keys may not be returned in order so once we have a 779 * placemarker (cluster) the scan must allow the full range 780 * or some entries will be missed. 781 */ 782 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 783 key_next, (hammer2_key_t)-1, 784 HAMMER2_LOOKUP_SHARED); 785 if (cluster) { 786 hammer2_cluster_bref(cluster, &bref); 787 saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1; 788 } else { 789 saveoff = (hammer2_key_t)-1; 790 } 791 if (cookie_index == ncookies) 792 break; 793 } 794 if (cluster) { 795 hammer2_cluster_unlock(cluster); 796 hammer2_cluster_drop(cluster); 797 } 798 done: 799 hammer2_inode_unlock(ip, cparent); 800 if (ap->a_eofflag) 801 *ap->a_eofflag = (cluster == NULL); 802 if (hammer2_debug & 0x0020) 803 kprintf("readdir: done at %016jx\n", saveoff); 804 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 805 if (error && cookie_index == 0) { 806 if (cookies) { 807 kfree(cookies, M_TEMP); 808 *ap->a_ncookies = 0; 809 *ap->a_cookies = NULL; 810 } 811 } else { 812 if (cookies) { 813 *ap->a_ncookies = cookie_index; 814 *ap->a_cookies = cookies; 815 } 816 } 817 LOCKSTOP; 818 return (error); 819 } 820 821 /* 822 * hammer2_vop_readlink { vp, uio, cred } 823 */ 824 static 825 int 826 hammer2_vop_readlink(struct vop_readlink_args *ap) 827 { 828 struct vnode *vp; 829 hammer2_inode_t *ip; 830 int error; 831 832 vp = ap->a_vp; 833 if (vp->v_type != VLNK) 834 return (EINVAL); 835 ip = VTOI(vp); 836 837 error = hammer2_read_file(ip, ap->a_uio, 0); 838 return (error); 839 } 840 841 static 842 int 843 hammer2_vop_read(struct vop_read_args *ap) 844 { 845 struct vnode *vp; 846 hammer2_inode_t *ip; 847 struct uio *uio; 848 int error; 849 int seqcount; 850 int bigread; 851 852 /* 853 * Read operations supported on this vnode? 854 */ 855 vp = ap->a_vp; 856 if (vp->v_type != VREG) 857 return (EINVAL); 858 859 /* 860 * Misc 861 */ 862 ip = VTOI(vp); 863 uio = ap->a_uio; 864 error = 0; 865 866 seqcount = ap->a_ioflag >> 16; 867 bigread = (uio->uio_resid > 100 * 1024 * 1024); 868 869 error = hammer2_read_file(ip, uio, seqcount); 870 return (error); 871 } 872 873 static 874 int 875 hammer2_vop_write(struct vop_write_args *ap) 876 { 877 hammer2_inode_t *ip; 878 hammer2_trans_t trans; 879 thread_t td; 880 struct vnode *vp; 881 struct uio *uio; 882 int error; 883 int seqcount; 884 int bigwrite; 885 886 /* 887 * Read operations supported on this vnode? 888 */ 889 vp = ap->a_vp; 890 if (vp->v_type != VREG) 891 return (EINVAL); 892 893 /* 894 * Misc 895 */ 896 ip = VTOI(vp); 897 uio = ap->a_uio; 898 error = 0; 899 if (ip->pmp->ronly) { 900 return (EROFS); 901 } 902 903 seqcount = ap->a_ioflag >> 16; 904 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 905 906 /* 907 * Check resource limit 908 */ 909 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 910 uio->uio_offset + uio->uio_resid > 911 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 912 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 913 return (EFBIG); 914 } 915 916 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 917 918 /* 919 * The transaction interlocks against flushes initiations 920 * (note: but will run concurrently with the actual flush). 921 */ 922 hammer2_trans_init(&trans, ip->pmp, 0); 923 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount); 924 hammer2_trans_done(&trans); 925 926 return (error); 927 } 928 929 /* 930 * Perform read operations on a file or symlink given an UNLOCKED 931 * inode and uio. 932 * 933 * The passed ip is not locked. 934 */ 935 static 936 int 937 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 938 { 939 hammer2_off_t size; 940 struct buf *bp; 941 int error; 942 943 error = 0; 944 945 /* 946 * UIO read loop. 947 * 948 * WARNING! Assumes that the kernel interlocks size changes at the 949 * vnode level. 950 */ 951 hammer2_mtx_sh(&ip->lock); 952 size = ip->size; 953 hammer2_mtx_unlock(&ip->lock); 954 955 while (uio->uio_resid > 0 && uio->uio_offset < size) { 956 hammer2_key_t lbase; 957 hammer2_key_t leof; 958 int lblksize; 959 int loff; 960 int n; 961 962 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 963 &lbase, &leof); 964 965 error = cluster_read(ip->vp, leof, lbase, lblksize, 966 uio->uio_resid, seqcount * BKVASIZE, 967 &bp); 968 969 if (error) 970 break; 971 loff = (int)(uio->uio_offset - lbase); 972 n = lblksize - loff; 973 if (n > uio->uio_resid) 974 n = uio->uio_resid; 975 if (n > size - uio->uio_offset) 976 n = (int)(size - uio->uio_offset); 977 bp->b_flags |= B_AGE; 978 uiomove((char *)bp->b_data + loff, n, uio); 979 bqrelse(bp); 980 } 981 return (error); 982 } 983 984 /* 985 * Write to the file represented by the inode via the logical buffer cache. 986 * The inode may represent a regular file or a symlink. 987 * 988 * The inode must not be locked. 989 */ 990 static 991 int 992 hammer2_write_file(hammer2_inode_t *ip, 993 struct uio *uio, int ioflag, int seqcount) 994 { 995 hammer2_key_t old_eof; 996 hammer2_key_t new_eof; 997 struct buf *bp; 998 int kflags; 999 int error; 1000 int modified; 1001 1002 /* 1003 * Setup if append 1004 * 1005 * WARNING! Assumes that the kernel interlocks size changes at the 1006 * vnode level. 1007 */ 1008 hammer2_mtx_ex(&ip->lock); 1009 if (ioflag & IO_APPEND) 1010 uio->uio_offset = ip->size; 1011 old_eof = ip->size; 1012 hammer2_mtx_unlock(&ip->lock); 1013 1014 /* 1015 * Extend the file if necessary. If the write fails at some point 1016 * we will truncate it back down to cover as much as we were able 1017 * to write. 1018 * 1019 * Doing this now makes it easier to calculate buffer sizes in 1020 * the loop. 1021 */ 1022 kflags = 0; 1023 error = 0; 1024 modified = 0; 1025 1026 if (uio->uio_offset + uio->uio_resid > old_eof) { 1027 new_eof = uio->uio_offset + uio->uio_resid; 1028 modified = 1; 1029 hammer2_extend_file(ip, new_eof); 1030 kflags |= NOTE_EXTEND; 1031 } else { 1032 new_eof = old_eof; 1033 } 1034 1035 /* 1036 * UIO write loop 1037 */ 1038 while (uio->uio_resid > 0) { 1039 hammer2_key_t lbase; 1040 int trivial; 1041 int endofblk; 1042 int lblksize; 1043 int loff; 1044 int n; 1045 1046 /* 1047 * Don't allow the buffer build to blow out the buffer 1048 * cache. 1049 */ 1050 if ((ioflag & IO_RECURSE) == 0) 1051 bwillwrite(HAMMER2_PBUFSIZE); 1052 1053 /* 1054 * This nominally tells us how much we can cluster and 1055 * what the logical buffer size needs to be. Currently 1056 * we don't try to cluster the write and just handle one 1057 * block at a time. 1058 */ 1059 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1060 &lbase, NULL); 1061 loff = (int)(uio->uio_offset - lbase); 1062 1063 KKASSERT(lblksize <= 65536); 1064 1065 /* 1066 * Calculate bytes to copy this transfer and whether the 1067 * copy completely covers the buffer or not. 1068 */ 1069 trivial = 0; 1070 n = lblksize - loff; 1071 if (n > uio->uio_resid) { 1072 n = uio->uio_resid; 1073 if (loff == lbase && uio->uio_offset + n == new_eof) 1074 trivial = 1; 1075 endofblk = 0; 1076 } else { 1077 if (loff == 0) 1078 trivial = 1; 1079 endofblk = 1; 1080 } 1081 1082 /* 1083 * Get the buffer 1084 */ 1085 if (uio->uio_segflg == UIO_NOCOPY) { 1086 /* 1087 * Issuing a write with the same data backing the 1088 * buffer. Instantiate the buffer to collect the 1089 * backing vm pages, then read-in any missing bits. 1090 * 1091 * This case is used by vop_stdputpages(). 1092 */ 1093 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1094 if ((bp->b_flags & B_CACHE) == 0) { 1095 bqrelse(bp); 1096 error = bread(ip->vp, lbase, lblksize, &bp); 1097 } 1098 } else if (trivial) { 1099 /* 1100 * Even though we are entirely overwriting the buffer 1101 * we may still have to zero it out to avoid a 1102 * mmap/write visibility issue. 1103 */ 1104 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1105 if ((bp->b_flags & B_CACHE) == 0) 1106 vfs_bio_clrbuf(bp); 1107 } else { 1108 /* 1109 * Partial overwrite, read in any missing bits then 1110 * replace the portion being written. 1111 * 1112 * (The strategy code will detect zero-fill physical 1113 * blocks for this case). 1114 */ 1115 error = bread(ip->vp, lbase, lblksize, &bp); 1116 if (error == 0) 1117 bheavy(bp); 1118 } 1119 1120 if (error) { 1121 brelse(bp); 1122 break; 1123 } 1124 1125 /* 1126 * Ok, copy the data in 1127 */ 1128 error = uiomove(bp->b_data + loff, n, uio); 1129 kflags |= NOTE_WRITE; 1130 modified = 1; 1131 if (error) { 1132 brelse(bp); 1133 break; 1134 } 1135 1136 /* 1137 * WARNING: Pageout daemon will issue UIO_NOCOPY writes 1138 * with IO_SYNC or IO_ASYNC set. These writes 1139 * must be handled as the pageout daemon expects. 1140 */ 1141 if (ioflag & IO_SYNC) { 1142 bwrite(bp); 1143 } else if ((ioflag & IO_DIRECT) && endofblk) { 1144 bawrite(bp); 1145 } else if (ioflag & IO_ASYNC) { 1146 bawrite(bp); 1147 } else { 1148 bdwrite(bp); 1149 } 1150 } 1151 1152 /* 1153 * Cleanup. If we extended the file EOF but failed to write through 1154 * the entire write is a failure and we have to back-up. 1155 */ 1156 if (error && new_eof != old_eof) { 1157 hammer2_truncate_file(ip, old_eof); 1158 } else if (modified) { 1159 hammer2_mtx_ex(&ip->lock); 1160 hammer2_update_time(&ip->mtime); 1161 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME); 1162 hammer2_mtx_unlock(&ip->lock); 1163 } 1164 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1165 hammer2_knote(ip->vp, kflags); 1166 vsetisdirty(ip->vp); 1167 hammer2_trans_assert_strategy(ip->pmp); 1168 1169 return error; 1170 } 1171 1172 /* 1173 * Truncate the size of a file. The inode must not be locked. 1174 * 1175 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1176 * 1177 * WARNING: nvtruncbuf() can only be safely called without the inode lock 1178 * held due to the way our write thread works. 1179 * 1180 * WARNING! Assumes that the kernel interlocks size changes at the 1181 * vnode level. 1182 */ 1183 static 1184 void 1185 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1186 { 1187 hammer2_key_t lbase; 1188 int nblksize; 1189 1190 LOCKSTART; 1191 if (ip->vp) { 1192 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1193 nvtruncbuf(ip->vp, nsize, 1194 nblksize, (int)nsize & (nblksize - 1), 1195 0); 1196 } 1197 hammer2_mtx_ex(&ip->lock); 1198 ip->size = nsize; 1199 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1200 hammer2_mtx_unlock(&ip->lock); 1201 LOCKSTOP; 1202 } 1203 1204 /* 1205 * Extend the size of a file. The inode must not be locked. 1206 * 1207 * WARNING! Assumes that the kernel interlocks size changes at the 1208 * vnode level. 1209 * 1210 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1211 */ 1212 static 1213 void 1214 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1215 { 1216 hammer2_key_t lbase; 1217 hammer2_key_t osize; 1218 int oblksize; 1219 int nblksize; 1220 1221 LOCKSTART; 1222 hammer2_mtx_ex(&ip->lock); 1223 osize = ip->size; 1224 ip->size = nsize; 1225 hammer2_mtx_unlock(&ip->lock); 1226 1227 if (ip->vp) { 1228 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1229 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1230 nvextendbuf(ip->vp, 1231 osize, nsize, 1232 oblksize, nblksize, 1233 -1, -1, 0); 1234 } 1235 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1236 LOCKSTOP; 1237 } 1238 1239 static 1240 int 1241 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1242 { 1243 hammer2_inode_t *ip; 1244 hammer2_inode_t *dip; 1245 hammer2_cluster_t *cparent; 1246 hammer2_cluster_t *cluster; 1247 const hammer2_inode_data_t *ripdata; 1248 hammer2_key_t key_next; 1249 hammer2_key_t lhc; 1250 struct namecache *ncp; 1251 const uint8_t *name; 1252 size_t name_len; 1253 int error = 0; 1254 struct vnode *vp; 1255 1256 LOCKSTART; 1257 dip = VTOI(ap->a_dvp); 1258 ncp = ap->a_nch->ncp; 1259 name = ncp->nc_name; 1260 name_len = ncp->nc_nlen; 1261 lhc = hammer2_dirhash(name, name_len); 1262 1263 /* 1264 * Note: In DragonFly the kernel handles '.' and '..'. 1265 */ 1266 cparent = hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS | 1267 HAMMER2_RESOLVE_SHARED); 1268 1269 cluster = hammer2_cluster_lookup(cparent, &key_next, 1270 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1271 HAMMER2_LOOKUP_SHARED); 1272 while (cluster) { 1273 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { 1274 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1275 if (ripdata->name_len == name_len && 1276 bcmp(ripdata->filename, name, name_len) == 0) { 1277 break; 1278 } 1279 } 1280 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 1281 key_next, 1282 lhc + HAMMER2_DIRHASH_LOMASK, 1283 HAMMER2_LOOKUP_SHARED); 1284 } 1285 hammer2_inode_unlock(dip, cparent); 1286 1287 /* 1288 * Resolve hardlink entries before acquiring the inode. 1289 */ 1290 if (cluster) { 1291 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1292 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1293 hammer2_tid_t inum = ripdata->inum; 1294 error = hammer2_hardlink_find(dip, NULL, &cluster); 1295 if (error) { 1296 kprintf("hammer2: unable to find hardlink " 1297 "0x%016jx\n", inum); 1298 LOCKSTOP; 1299 1300 return error; 1301 } 1302 } 1303 } 1304 1305 /* 1306 * nresolve needs to resolve hardlinks, the original cluster is not 1307 * sufficient. 1308 */ 1309 if (cluster) { 1310 ip = hammer2_inode_get(dip->pmp, dip, cluster); 1311 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1312 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1313 kprintf("nresolve: fixup hardlink\n"); 1314 hammer2_inode_ref(ip); 1315 hammer2_inode_unlock(ip, NULL); 1316 hammer2_cluster_unlock(cluster); 1317 hammer2_cluster_drop(cluster); 1318 cluster = hammer2_inode_lock(ip, 1319 HAMMER2_RESOLVE_ALWAYS); 1320 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 1321 hammer2_inode_drop(ip); 1322 kprintf("nresolve: fixup to type %02x\n", 1323 ripdata->type); 1324 } 1325 } else { 1326 ip = NULL; 1327 } 1328 1329 #if 0 1330 /* 1331 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1332 * If an error occurs chain and ip are left alone. 1333 * 1334 * XXX upgrade shared lock? 1335 */ 1336 if (ochain && chain && 1337 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) { 1338 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1339 chain->data->ipdata.filename); 1340 /* XXX retain shared lock on dip? (currently not held) */ 1341 hammer2_trans_init(&trans, dip->pmp, 0); 1342 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1343 hammer2_trans_done(&trans); 1344 } 1345 #endif 1346 1347 /* 1348 * Acquire the related vnode 1349 * 1350 * NOTE: For error processing, only ENOENT resolves the namecache 1351 * entry to NULL, otherwise we just return the error and 1352 * leave the namecache unresolved. 1353 * 1354 * NOTE: multiple hammer2_inode structures can be aliased to the 1355 * same chain element, for example for hardlinks. This 1356 * use case does not 'reattach' inode associations that 1357 * might already exist, but always allocates a new one. 1358 * 1359 * WARNING: inode structure is locked exclusively via inode_get 1360 * but chain was locked shared. inode_unlock() 1361 * will handle it properly. 1362 */ 1363 if (cluster) { 1364 vp = hammer2_igetv(ip, cluster, &error); 1365 if (error == 0) { 1366 vn_unlock(vp); 1367 cache_setvp(ap->a_nch, vp); 1368 } else if (error == ENOENT) { 1369 cache_setvp(ap->a_nch, NULL); 1370 } 1371 hammer2_inode_unlock(ip, cluster); 1372 1373 /* 1374 * The vp should not be released until after we've disposed 1375 * of our locks, because it might cause vop_inactive() to 1376 * be called. 1377 */ 1378 if (vp) 1379 vrele(vp); 1380 } else { 1381 error = ENOENT; 1382 cache_setvp(ap->a_nch, NULL); 1383 } 1384 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1385 ("resolve error %d/%p ap %p\n", 1386 error, ap->a_nch->ncp->nc_vp, ap)); 1387 LOCKSTOP; 1388 return error; 1389 } 1390 1391 static 1392 int 1393 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1394 { 1395 hammer2_inode_t *dip; 1396 hammer2_inode_t *ip; 1397 hammer2_cluster_t *cparent; 1398 int error; 1399 1400 LOCKSTART; 1401 dip = VTOI(ap->a_dvp); 1402 1403 if ((ip = dip->pip) == NULL) { 1404 *ap->a_vpp = NULL; 1405 LOCKSTOP; 1406 return ENOENT; 1407 } 1408 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 1409 *ap->a_vpp = hammer2_igetv(ip, cparent, &error); 1410 hammer2_inode_unlock(ip, cparent); 1411 1412 LOCKSTOP; 1413 return error; 1414 } 1415 1416 static 1417 int 1418 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1419 { 1420 hammer2_inode_t *dip; 1421 hammer2_inode_t *nip; 1422 hammer2_trans_t trans; 1423 hammer2_cluster_t *cluster; 1424 struct namecache *ncp; 1425 const uint8_t *name; 1426 size_t name_len; 1427 int error; 1428 1429 LOCKSTART; 1430 dip = VTOI(ap->a_dvp); 1431 if (dip->pmp->ronly) { 1432 LOCKSTOP; 1433 return (EROFS); 1434 } 1435 1436 ncp = ap->a_nch->ncp; 1437 name = ncp->nc_name; 1438 name_len = ncp->nc_nlen; 1439 cluster = NULL; 1440 1441 hammer2_pfs_memory_wait(dip->pmp); 1442 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1443 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1444 name, name_len, 1445 &cluster, 0, &error); 1446 if (error) { 1447 KKASSERT(nip == NULL); 1448 *ap->a_vpp = NULL; 1449 } else { 1450 *ap->a_vpp = hammer2_igetv(nip, cluster, &error); 1451 hammer2_inode_unlock(nip, cluster); 1452 } 1453 hammer2_trans_done(&trans); 1454 1455 if (error == 0) { 1456 cache_setunresolved(ap->a_nch); 1457 cache_setvp(ap->a_nch, *ap->a_vpp); 1458 } 1459 LOCKSTOP; 1460 return error; 1461 } 1462 1463 /* 1464 * Return the largest contiguous physical disk range for the logical 1465 * request, in bytes. 1466 * 1467 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1468 * 1469 * Basically disabled, the logical buffer write thread has to deal with 1470 * buffers one-at-a-time. 1471 */ 1472 static 1473 int 1474 hammer2_vop_bmap(struct vop_bmap_args *ap) 1475 { 1476 *ap->a_doffsetp = NOOFFSET; 1477 if (ap->a_runp) 1478 *ap->a_runp = 0; 1479 if (ap->a_runb) 1480 *ap->a_runb = 0; 1481 return (EOPNOTSUPP); 1482 } 1483 1484 static 1485 int 1486 hammer2_vop_open(struct vop_open_args *ap) 1487 { 1488 return vop_stdopen(ap); 1489 } 1490 1491 /* 1492 * hammer2_vop_advlock { vp, id, op, fl, flags } 1493 */ 1494 static 1495 int 1496 hammer2_vop_advlock(struct vop_advlock_args *ap) 1497 { 1498 hammer2_inode_t *ip = VTOI(ap->a_vp); 1499 const hammer2_inode_data_t *ripdata; 1500 hammer2_cluster_t *cparent; 1501 hammer2_off_t size; 1502 1503 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 1504 HAMMER2_RESOLVE_SHARED); 1505 ripdata = &hammer2_cluster_rdata(cparent)->ipdata; 1506 size = ripdata->size; 1507 hammer2_inode_unlock(ip, cparent); 1508 return (lf_advlock(ap, &ip->advlock, size)); 1509 } 1510 1511 1512 static 1513 int 1514 hammer2_vop_close(struct vop_close_args *ap) 1515 { 1516 return vop_stdclose(ap); 1517 } 1518 1519 /* 1520 * hammer2_vop_nlink { nch, dvp, vp, cred } 1521 * 1522 * Create a hardlink from (vp) to {dvp, nch}. 1523 */ 1524 static 1525 int 1526 hammer2_vop_nlink(struct vop_nlink_args *ap) 1527 { 1528 hammer2_inode_t *fdip; /* target directory to create link in */ 1529 hammer2_inode_t *tdip; /* target directory to create link in */ 1530 hammer2_inode_t *cdip; /* common parent directory */ 1531 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1532 hammer2_cluster_t *cluster; 1533 hammer2_cluster_t *fdcluster; 1534 hammer2_cluster_t *tdcluster; 1535 hammer2_cluster_t *cdcluster; 1536 hammer2_trans_t trans; 1537 struct namecache *ncp; 1538 const uint8_t *name; 1539 size_t name_len; 1540 int error; 1541 1542 LOCKSTART; 1543 tdip = VTOI(ap->a_dvp); 1544 if (tdip->pmp->ronly) { 1545 LOCKSTOP; 1546 return (EROFS); 1547 } 1548 1549 ncp = ap->a_nch->ncp; 1550 name = ncp->nc_name; 1551 name_len = ncp->nc_nlen; 1552 1553 /* 1554 * ip represents the file being hardlinked. The file could be a 1555 * normal file or a hardlink target if it has already been hardlinked. 1556 * If ip is a hardlinked target then ip->pip represents the location 1557 * of the hardlinked target, NOT the location of the hardlink pointer. 1558 * 1559 * Bump nlinks and potentially also create or move the hardlink 1560 * target in the parent directory common to (ip) and (tdip). The 1561 * consolidation code can modify ip->cluster and ip->pip. The 1562 * returned cluster is locked. 1563 */ 1564 ip = VTOI(ap->a_vp); 1565 hammer2_pfs_memory_wait(ip->pmp); 1566 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_NEWINODE); 1567 1568 /* 1569 * The common parent directory must be locked first to avoid deadlocks. 1570 * Also note that fdip and/or tdip might match cdip. 1571 */ 1572 fdip = ip->pip; 1573 cdip = hammer2_inode_common_parent(fdip, tdip); 1574 cdcluster = hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS); 1575 fdcluster = hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS); 1576 tdcluster = hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS); 1577 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 1578 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1579 cdip, cdcluster, 1); 1580 if (error) 1581 goto done; 1582 1583 /* 1584 * Create a directory entry connected to the specified cluster. 1585 * 1586 * WARNING! chain can get moved by the connect (indirectly due to 1587 * potential indirect block creation). 1588 */ 1589 error = hammer2_inode_connect(&trans, &cluster, 1, 1590 tdip, tdcluster, 1591 name, name_len, 0); 1592 if (error == 0) { 1593 cache_setunresolved(ap->a_nch); 1594 cache_setvp(ap->a_nch, ap->a_vp); 1595 } 1596 done: 1597 hammer2_inode_unlock(ip, cluster); 1598 hammer2_inode_unlock(tdip, tdcluster); 1599 hammer2_inode_unlock(fdip, fdcluster); 1600 hammer2_inode_unlock(cdip, cdcluster); 1601 hammer2_inode_drop(cdip); 1602 hammer2_trans_done(&trans); 1603 1604 LOCKSTOP; 1605 return error; 1606 } 1607 1608 /* 1609 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1610 * 1611 * The operating system has already ensured that the directory entry 1612 * does not exist and done all appropriate namespace locking. 1613 */ 1614 static 1615 int 1616 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1617 { 1618 hammer2_inode_t *dip; 1619 hammer2_inode_t *nip; 1620 hammer2_trans_t trans; 1621 hammer2_cluster_t *ncluster; 1622 struct namecache *ncp; 1623 const uint8_t *name; 1624 size_t name_len; 1625 int error; 1626 1627 LOCKSTART; 1628 dip = VTOI(ap->a_dvp); 1629 if (dip->pmp->ronly) { 1630 LOCKSTOP; 1631 return (EROFS); 1632 } 1633 1634 ncp = ap->a_nch->ncp; 1635 name = ncp->nc_name; 1636 name_len = ncp->nc_nlen; 1637 hammer2_pfs_memory_wait(dip->pmp); 1638 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1639 ncluster = NULL; 1640 1641 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1642 name, name_len, 1643 &ncluster, 0, &error); 1644 if (error) { 1645 KKASSERT(nip == NULL); 1646 *ap->a_vpp = NULL; 1647 } else { 1648 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1649 hammer2_inode_unlock(nip, ncluster); 1650 } 1651 hammer2_trans_done(&trans); 1652 1653 if (error == 0) { 1654 cache_setunresolved(ap->a_nch); 1655 cache_setvp(ap->a_nch, *ap->a_vpp); 1656 } 1657 LOCKSTOP; 1658 return error; 1659 } 1660 1661 /* 1662 * Make a device node (typically a fifo) 1663 */ 1664 static 1665 int 1666 hammer2_vop_nmknod(struct vop_nmknod_args *ap) 1667 { 1668 hammer2_inode_t *dip; 1669 hammer2_inode_t *nip; 1670 hammer2_trans_t trans; 1671 hammer2_cluster_t *ncluster; 1672 struct namecache *ncp; 1673 const uint8_t *name; 1674 size_t name_len; 1675 int error; 1676 1677 LOCKSTART; 1678 dip = VTOI(ap->a_dvp); 1679 if (dip->pmp->ronly) { 1680 LOCKSTOP; 1681 return (EROFS); 1682 } 1683 1684 ncp = ap->a_nch->ncp; 1685 name = ncp->nc_name; 1686 name_len = ncp->nc_nlen; 1687 hammer2_pfs_memory_wait(dip->pmp); 1688 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1689 ncluster = NULL; 1690 1691 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1692 name, name_len, 1693 &ncluster, 0, &error); 1694 if (error) { 1695 KKASSERT(nip == NULL); 1696 *ap->a_vpp = NULL; 1697 } else { 1698 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error); 1699 hammer2_inode_unlock(nip, ncluster); 1700 } 1701 hammer2_trans_done(&trans); 1702 1703 if (error == 0) { 1704 cache_setunresolved(ap->a_nch); 1705 cache_setvp(ap->a_nch, *ap->a_vpp); 1706 } 1707 LOCKSTOP; 1708 return error; 1709 } 1710 1711 /* 1712 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1713 */ 1714 static 1715 int 1716 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1717 { 1718 hammer2_inode_t *dip; 1719 hammer2_inode_t *nip; 1720 hammer2_cluster_t *ncparent; 1721 hammer2_trans_t trans; 1722 struct namecache *ncp; 1723 const uint8_t *name; 1724 size_t name_len; 1725 int error; 1726 1727 dip = VTOI(ap->a_dvp); 1728 if (dip->pmp->ronly) 1729 return (EROFS); 1730 1731 ncp = ap->a_nch->ncp; 1732 name = ncp->nc_name; 1733 name_len = ncp->nc_nlen; 1734 hammer2_pfs_memory_wait(dip->pmp); 1735 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE); 1736 ncparent = NULL; 1737 1738 ap->a_vap->va_type = VLNK; /* enforce type */ 1739 1740 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1741 name, name_len, 1742 &ncparent, 0, &error); 1743 if (error) { 1744 KKASSERT(nip == NULL); 1745 *ap->a_vpp = NULL; 1746 hammer2_trans_done(&trans); 1747 return error; 1748 } 1749 *ap->a_vpp = hammer2_igetv(nip, ncparent, &error); 1750 1751 /* 1752 * Build the softlink (~like file data) and finalize the namecache. 1753 */ 1754 if (error == 0) { 1755 size_t bytes; 1756 struct uio auio; 1757 struct iovec aiov; 1758 hammer2_inode_data_t *nipdata; 1759 1760 nipdata = &hammer2_cluster_wdata(ncparent)->ipdata; 1761 /* nipdata = &nip->chain->data->ipdata;XXX */ 1762 bytes = strlen(ap->a_target); 1763 1764 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1765 KKASSERT(nipdata->op_flags & 1766 HAMMER2_OPFLAG_DIRECTDATA); 1767 bcopy(ap->a_target, nipdata->u.data, bytes); 1768 nipdata->size = bytes; 1769 nip->size = bytes; 1770 hammer2_cluster_modsync(ncparent); 1771 hammer2_inode_unlock(nip, ncparent); 1772 /* nipdata = NULL; not needed */ 1773 } else { 1774 hammer2_inode_unlock(nip, ncparent); 1775 /* nipdata = NULL; not needed */ 1776 bzero(&auio, sizeof(auio)); 1777 bzero(&aiov, sizeof(aiov)); 1778 auio.uio_iov = &aiov; 1779 auio.uio_segflg = UIO_SYSSPACE; 1780 auio.uio_rw = UIO_WRITE; 1781 auio.uio_resid = bytes; 1782 auio.uio_iovcnt = 1; 1783 auio.uio_td = curthread; 1784 aiov.iov_base = ap->a_target; 1785 aiov.iov_len = bytes; 1786 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1787 /* XXX handle error */ 1788 error = 0; 1789 } 1790 } else { 1791 hammer2_inode_unlock(nip, ncparent); 1792 } 1793 hammer2_trans_done(&trans); 1794 1795 /* 1796 * Finalize namecache 1797 */ 1798 if (error == 0) { 1799 cache_setunresolved(ap->a_nch); 1800 cache_setvp(ap->a_nch, *ap->a_vpp); 1801 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1802 } 1803 return error; 1804 } 1805 1806 /* 1807 * hammer2_vop_nremove { nch, dvp, cred } 1808 */ 1809 static 1810 int 1811 hammer2_vop_nremove(struct vop_nremove_args *ap) 1812 { 1813 hammer2_inode_t *dip; 1814 hammer2_trans_t trans; 1815 struct namecache *ncp; 1816 const uint8_t *name; 1817 size_t name_len; 1818 int error; 1819 1820 LOCKSTART; 1821 dip = VTOI(ap->a_dvp); 1822 if (dip->pmp->ronly) { 1823 LOCKSTOP; 1824 return(EROFS); 1825 } 1826 1827 ncp = ap->a_nch->ncp; 1828 name = ncp->nc_name; 1829 name_len = ncp->nc_nlen; 1830 1831 hammer2_pfs_memory_wait(dip->pmp); 1832 hammer2_trans_init(&trans, dip->pmp, 0); 1833 error = hammer2_unlink_file(&trans, dip, name, name_len, 1834 0, NULL, ap->a_nch, -1); 1835 hammer2_run_unlinkq(&trans, dip->pmp); 1836 hammer2_trans_done(&trans); 1837 if (error == 0) 1838 cache_unlink(ap->a_nch); 1839 LOCKSTOP; 1840 return (error); 1841 } 1842 1843 /* 1844 * hammer2_vop_nrmdir { nch, dvp, cred } 1845 */ 1846 static 1847 int 1848 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1849 { 1850 hammer2_inode_t *dip; 1851 hammer2_trans_t trans; 1852 struct namecache *ncp; 1853 const uint8_t *name; 1854 size_t name_len; 1855 int error; 1856 1857 LOCKSTART; 1858 dip = VTOI(ap->a_dvp); 1859 if (dip->pmp->ronly) { 1860 LOCKSTOP; 1861 return(EROFS); 1862 } 1863 1864 ncp = ap->a_nch->ncp; 1865 name = ncp->nc_name; 1866 name_len = ncp->nc_nlen; 1867 1868 hammer2_pfs_memory_wait(dip->pmp); 1869 hammer2_trans_init(&trans, dip->pmp, 0); 1870 hammer2_run_unlinkq(&trans, dip->pmp); 1871 error = hammer2_unlink_file(&trans, dip, name, name_len, 1872 1, NULL, ap->a_nch, -1); 1873 hammer2_trans_done(&trans); 1874 if (error == 0) 1875 cache_unlink(ap->a_nch); 1876 LOCKSTOP; 1877 return (error); 1878 } 1879 1880 /* 1881 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1882 */ 1883 static 1884 int 1885 hammer2_vop_nrename(struct vop_nrename_args *ap) 1886 { 1887 struct namecache *fncp; 1888 struct namecache *tncp; 1889 hammer2_inode_t *cdip; 1890 hammer2_inode_t *fdip; 1891 hammer2_inode_t *tdip; 1892 hammer2_inode_t *ip; 1893 hammer2_cluster_t *cluster; 1894 hammer2_cluster_t *fdcluster; 1895 hammer2_cluster_t *tdcluster; 1896 hammer2_cluster_t *cdcluster; 1897 hammer2_trans_t trans; 1898 const uint8_t *fname; 1899 size_t fname_len; 1900 const uint8_t *tname; 1901 size_t tname_len; 1902 int error; 1903 int tnch_error; 1904 int hlink; 1905 1906 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1907 return(EXDEV); 1908 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1909 return(EXDEV); 1910 1911 fdip = VTOI(ap->a_fdvp); /* source directory */ 1912 tdip = VTOI(ap->a_tdvp); /* target directory */ 1913 1914 if (fdip->pmp->ronly) 1915 return(EROFS); 1916 1917 LOCKSTART; 1918 fncp = ap->a_fnch->ncp; /* entry name in source */ 1919 fname = fncp->nc_name; 1920 fname_len = fncp->nc_nlen; 1921 1922 tncp = ap->a_tnch->ncp; /* entry name in target */ 1923 tname = tncp->nc_name; 1924 tname_len = tncp->nc_nlen; 1925 1926 hammer2_pfs_memory_wait(tdip->pmp); 1927 hammer2_trans_init(&trans, tdip->pmp, 0); 1928 1929 /* 1930 * ip is the inode being renamed. If this is a hardlink then 1931 * ip represents the actual file and not the hardlink marker. 1932 */ 1933 ip = VTOI(fncp->nc_vp); 1934 cluster = NULL; 1935 1936 1937 /* 1938 * The common parent directory must be locked first to avoid deadlocks. 1939 * Also note that fdip and/or tdip might match cdip. 1940 * 1941 * WARNING! fdip may not match ip->pip. That is, if the source file 1942 * is already a hardlink then what we are renaming is the 1943 * hardlink pointer, not the hardlink itself. The hardlink 1944 * directory (ip->pip) will already be at a common parent 1945 * of fdrip. 1946 * 1947 * Be sure to use ip->pip when finding the common parent 1948 * against tdip or we might accidently move the hardlink 1949 * target into a subdirectory that makes it inaccessible to 1950 * other pointers. 1951 */ 1952 cdip = hammer2_inode_common_parent(ip->pip, tdip); 1953 cdcluster = hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS); 1954 fdcluster = hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS); 1955 tdcluster = hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS); 1956 1957 /* 1958 * Keep a tight grip on the inode so the temporary unlinking from 1959 * the source location prior to linking to the target location 1960 * does not cause the cluster to be destroyed. 1961 * 1962 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 1963 * unlinking elements from their directories. Locking 1964 * the nlinks field does not lock the whole inode. 1965 */ 1966 hammer2_inode_ref(ip); 1967 1968 /* 1969 * Remove target if it exists. 1970 */ 1971 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, 1972 -1, NULL, ap->a_tnch, -1); 1973 tnch_error = error; 1974 if (error && error != ENOENT) 1975 goto done; 1976 1977 /* 1978 * When renaming a hardlinked file we may have to re-consolidate 1979 * the location of the hardlink target. 1980 * 1981 * If ip represents a regular file the consolidation code essentially 1982 * does nothing other than return the same locked cluster that was 1983 * passed in. 1984 * 1985 * The returned cluster will be locked. 1986 * 1987 * WARNING! We do not currently have a local copy of ipdata but 1988 * we do use one later remember that it must be reloaded 1989 * on any modification to the inode, including connects. 1990 */ 1991 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 1992 error = hammer2_hardlink_consolidate(&trans, ip, &cluster, 1993 cdip, cdcluster, 0); 1994 if (error) 1995 goto done; 1996 1997 /* 1998 * Disconnect (fdip, fname) from the source directory. This will 1999 * disconnect (ip) if it represents a direct file. If (ip) represents 2000 * a hardlink the HARDLINK pointer object will be removed but the 2001 * hardlink will stay intact. 2002 * 2003 * Always pass nch as NULL because we intend to reconnect the inode, 2004 * so we don't want hammer2_unlink_file() to rename it to the hidden 2005 * open-but-unlinked directory. 2006 * 2007 * The target cluster may be marked DELETED but will not be destroyed 2008 * since we retain our hold on ip and cluster. 2009 * 2010 * NOTE: We pass nlinks as 0 (not -1) in order to retain the file's 2011 * link count. 2012 */ 2013 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, 2014 -1, &hlink, NULL, 0); 2015 KKASSERT(error != EAGAIN); 2016 if (error) 2017 goto done; 2018 2019 /* 2020 * Reconnect ip to target directory using cluster. Chains cannot 2021 * actually be moved, so this will duplicate the cluster in the new 2022 * spot and assign it to the ip, replacing the old cluster. 2023 * 2024 * WARNING: Because recursive locks are allowed and we unlinked the 2025 * file that we have a cluster-in-hand for just above, the 2026 * cluster might have been delete-duplicated. We must 2027 * refactor the cluster. 2028 * 2029 * WARNING: Chain locks can lock buffer cache buffers, to avoid 2030 * deadlocks we want to unlock before issuing a cache_*() 2031 * op (that might have to lock a vnode). 2032 * 2033 * NOTE: Pass nlinks as 0 because we retained the link count from 2034 * the unlink, so we do not have to modify it. 2035 */ 2036 error = hammer2_inode_connect(&trans, &cluster, hlink, 2037 tdip, tdcluster, 2038 tname, tname_len, 0); 2039 if (error == 0) { 2040 KKASSERT(cluster != NULL); 2041 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster); 2042 } 2043 done: 2044 hammer2_inode_unlock(ip, cluster); 2045 hammer2_inode_unlock(tdip, tdcluster); 2046 hammer2_inode_unlock(fdip, fdcluster); 2047 hammer2_inode_unlock(cdip, cdcluster); 2048 hammer2_inode_drop(ip); 2049 hammer2_inode_drop(cdip); 2050 hammer2_run_unlinkq(&trans, fdip->pmp); 2051 hammer2_trans_done(&trans); 2052 2053 /* 2054 * Issue the namecache update after unlocking all the internal 2055 * hammer structures, otherwise we might deadlock. 2056 */ 2057 if (tnch_error == 0) { 2058 cache_unlink(ap->a_tnch); 2059 cache_setunresolved(ap->a_tnch); 2060 } 2061 if (error == 0) 2062 cache_rename(ap->a_fnch, ap->a_tnch); 2063 2064 LOCKSTOP; 2065 return (error); 2066 } 2067 2068 /* 2069 * Strategy code (async logical file buffer I/O from system) 2070 * 2071 * WARNING: The strategy code cannot safely use hammer2 transactions 2072 * as this can deadlock against vfs_sync's vfsync() call 2073 * if multiple flushes are queued. All H2 structures must 2074 * already be present and ready for the DIO. 2075 * 2076 * Reads can be initiated asynchronously, writes have to be 2077 * spooled to a separate thread for action to avoid deadlocks. 2078 */ 2079 static int hammer2_strategy_read(struct vop_strategy_args *ap); 2080 static int hammer2_strategy_write(struct vop_strategy_args *ap); 2081 static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb); 2082 2083 static 2084 int 2085 hammer2_vop_strategy(struct vop_strategy_args *ap) 2086 { 2087 struct bio *biop; 2088 struct buf *bp; 2089 int error; 2090 2091 biop = ap->a_bio; 2092 bp = biop->bio_buf; 2093 2094 switch(bp->b_cmd) { 2095 case BUF_CMD_READ: 2096 error = hammer2_strategy_read(ap); 2097 ++hammer2_iod_file_read; 2098 break; 2099 case BUF_CMD_WRITE: 2100 error = hammer2_strategy_write(ap); 2101 ++hammer2_iod_file_write; 2102 break; 2103 default: 2104 bp->b_error = error = EINVAL; 2105 bp->b_flags |= B_ERROR; 2106 biodone(biop); 2107 break; 2108 } 2109 return (error); 2110 } 2111 2112 /* 2113 * Logical buffer I/O, async read. 2114 */ 2115 static 2116 int 2117 hammer2_strategy_read(struct vop_strategy_args *ap) 2118 { 2119 struct buf *bp; 2120 struct bio *bio; 2121 struct bio *nbio; 2122 hammer2_inode_t *ip; 2123 hammer2_cluster_t *cparent; 2124 hammer2_cluster_t *cluster; 2125 hammer2_key_t key_dummy; 2126 hammer2_key_t lbase; 2127 uint8_t btype; 2128 2129 bio = ap->a_bio; 2130 bp = bio->bio_buf; 2131 ip = VTOI(ap->a_vp); 2132 nbio = push_bio(bio); 2133 2134 lbase = bio->bio_offset; 2135 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 2136 2137 /* 2138 * Lookup the file offset. 2139 */ 2140 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS | 2141 HAMMER2_RESOLVE_SHARED); 2142 cluster = hammer2_cluster_lookup(cparent, &key_dummy, 2143 lbase, lbase, 2144 HAMMER2_LOOKUP_NODATA | 2145 HAMMER2_LOOKUP_SHARED); 2146 hammer2_inode_unlock(ip, cparent); 2147 2148 /* 2149 * Data is zero-fill if no cluster could be found 2150 * (XXX or EIO on a cluster failure). 2151 */ 2152 if (cluster == NULL) { 2153 bp->b_resid = 0; 2154 bp->b_error = 0; 2155 bzero(bp->b_data, bp->b_bcount); 2156 biodone(nbio); 2157 return(0); 2158 } 2159 2160 /* 2161 * Cluster elements must be type INODE or type DATA, but the 2162 * compression mode (or not) for DATA chains can be different for 2163 * each chain. This will be handled by the callback. 2164 * 2165 * If the cluster already has valid data the callback will be made 2166 * immediately/synchronously. 2167 */ 2168 btype = hammer2_cluster_type(cluster); 2169 if (btype != HAMMER2_BREF_TYPE_INODE && 2170 btype != HAMMER2_BREF_TYPE_DATA) { 2171 panic("READ PATH: hammer2_strategy_read: unknown bref type"); 2172 } 2173 hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback, 2174 nbio); 2175 return(0); 2176 } 2177 2178 /* 2179 * Read callback for hammer2_cluster_load_async(). The load function may 2180 * start several actual I/Os but will only make one callback, typically with 2181 * the first valid I/O XXX 2182 */ 2183 static 2184 void 2185 hammer2_strategy_read_callback(hammer2_iocb_t *iocb) 2186 { 2187 struct bio *bio = iocb->ptr; /* original logical buffer */ 2188 struct buf *bp = bio->bio_buf; /* original logical buffer */ 2189 hammer2_chain_t *chain; 2190 hammer2_cluster_t *cluster; 2191 hammer2_io_t *dio; 2192 char *data; 2193 int i; 2194 2195 /* 2196 * Extract data and handle iteration on I/O failure. iocb->off 2197 * is the cluster index for iteration. 2198 */ 2199 cluster = iocb->cluster; 2200 dio = iocb->dio; /* can be NULL if iocb not in progress */ 2201 2202 /* 2203 * Work to do if INPROG set, else dio is already good or dio is 2204 * NULL (which is the shortcut case if chain->data is already good). 2205 */ 2206 if (iocb->flags & HAMMER2_IOCB_INPROG) { 2207 /* 2208 * Read attempt not yet made. Issue an asynchronous read 2209 * if necessary and return, operation will chain back to 2210 * this function. 2211 */ 2212 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) { 2213 if (dio->bp == NULL || 2214 (dio->bp->b_flags & B_CACHE) == 0) { 2215 if (dio->bp) { 2216 bqrelse(dio->bp); 2217 dio->bp = NULL; 2218 } 2219 iocb->flags |= HAMMER2_IOCB_READ; 2220 breadcb(dio->hmp->devvp, 2221 dio->pbase, dio->psize, 2222 hammer2_io_callback, iocb); 2223 return; 2224 } 2225 } 2226 } 2227 2228 /* 2229 * If we have a DIO it is now done, check for an error and 2230 * calculate the data. 2231 * 2232 * If there is no DIO it is an optimization by 2233 * hammer2_cluster_load_async(), the data is available in 2234 * chain->data. 2235 */ 2236 if (dio) { 2237 if (dio->bp->b_flags & B_ERROR) { 2238 i = (int)iocb->lbase + 1; 2239 if (i >= cluster->nchains) { 2240 bp->b_flags |= B_ERROR; 2241 bp->b_error = dio->bp->b_error; 2242 hammer2_io_complete(iocb); 2243 biodone(bio); 2244 hammer2_cluster_unlock(cluster); 2245 hammer2_cluster_drop(cluster); 2246 } else { 2247 hammer2_io_complete(iocb); /* XXX */ 2248 chain = cluster->array[i].chain; 2249 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain); 2250 hammer2_adjreadcounter(&chain->bref, 2251 chain->bytes); 2252 iocb->chain = chain; 2253 iocb->lbase = (off_t)i; 2254 iocb->flags = 0; 2255 iocb->error = 0; 2256 hammer2_io_getblk(chain->hmp, 2257 chain->bref.data_off, 2258 chain->bytes, 2259 iocb); 2260 } 2261 return; 2262 } 2263 chain = iocb->chain; 2264 data = hammer2_io_data(dio, chain->bref.data_off); 2265 } else { 2266 /* 2267 * Special synchronous case, data present in chain->data. 2268 */ 2269 chain = iocb->chain; 2270 data = (void *)chain->data; 2271 } 2272 2273 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2274 /* 2275 * Data is embedded in the inode (copy from inode). 2276 */ 2277 bcopy(((hammer2_inode_data_t *)data)->u.data, 2278 bp->b_data, HAMMER2_EMBEDDED_BYTES); 2279 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2280 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2281 bp->b_resid = 0; 2282 bp->b_error = 0; 2283 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2284 /* 2285 * Data is on-media, issue device I/O and copy. 2286 * 2287 * XXX direct-IO shortcut could go here XXX. 2288 */ 2289 switch (HAMMER2_DEC_COMP(chain->bref.methods)) { 2290 case HAMMER2_COMP_LZ4: 2291 hammer2_decompress_LZ4_callback(data, chain->bytes, 2292 bio); 2293 break; 2294 case HAMMER2_COMP_ZLIB: 2295 hammer2_decompress_ZLIB_callback(data, chain->bytes, 2296 bio); 2297 break; 2298 case HAMMER2_COMP_NONE: 2299 KKASSERT(chain->bytes <= bp->b_bcount); 2300 bcopy(data, bp->b_data, chain->bytes); 2301 if (chain->bytes < bp->b_bcount) { 2302 bzero(bp->b_data + chain->bytes, 2303 bp->b_bcount - chain->bytes); 2304 } 2305 bp->b_flags |= B_NOTMETA; 2306 bp->b_resid = 0; 2307 bp->b_error = 0; 2308 break; 2309 default: 2310 panic("hammer2_strategy_read: " 2311 "unknown compression type"); 2312 } 2313 } else { 2314 /* bqrelse the dio to help stabilize the call to panic() */ 2315 if (dio) 2316 hammer2_io_bqrelse(&dio); 2317 panic("hammer2_strategy_read: unknown bref type"); 2318 } 2319 2320 /* 2321 * Once the iocb is cleaned up the DIO (if any) will no longer be 2322 * in-progress but will still have a ref. Be sure to release 2323 * the ref. 2324 */ 2325 hammer2_io_complete(iocb); /* physical management */ 2326 if (dio) /* physical dio & buffer */ 2327 hammer2_io_bqrelse(&dio); 2328 hammer2_cluster_unlock(cluster); /* cluster management */ 2329 hammer2_cluster_drop(cluster); /* cluster management */ 2330 biodone(bio); /* logical buffer */ 2331 } 2332 2333 static 2334 int 2335 hammer2_strategy_write(struct vop_strategy_args *ap) 2336 { 2337 hammer2_pfs_t *pmp; 2338 struct bio *bio; 2339 struct buf *bp; 2340 hammer2_inode_t *ip; 2341 2342 bio = ap->a_bio; 2343 bp = bio->bio_buf; 2344 ip = VTOI(ap->a_vp); 2345 pmp = ip->pmp; 2346 2347 hammer2_lwinprog_ref(pmp); 2348 hammer2_trans_assert_strategy(pmp); 2349 hammer2_mtx_ex(&pmp->wthread_mtx); 2350 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) { 2351 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2352 hammer2_mtx_unlock(&pmp->wthread_mtx); 2353 wakeup(&pmp->wthread_bioq); 2354 } else { 2355 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2356 hammer2_mtx_unlock(&pmp->wthread_mtx); 2357 } 2358 hammer2_lwinprog_wait(pmp); 2359 2360 return(0); 2361 } 2362 2363 /* 2364 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2365 */ 2366 static 2367 int 2368 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2369 { 2370 hammer2_inode_t *ip; 2371 int error; 2372 2373 LOCKSTART; 2374 ip = VTOI(ap->a_vp); 2375 2376 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2377 ap->a_fflag, ap->a_cred); 2378 LOCKSTOP; 2379 return (error); 2380 } 2381 2382 static 2383 int 2384 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2385 { 2386 struct mount *mp; 2387 hammer2_pfs_t *pmp; 2388 int rc; 2389 2390 LOCKSTART; 2391 switch (ap->a_op) { 2392 case (MOUNTCTL_SET_EXPORT): 2393 mp = ap->a_head.a_ops->head.vv_mount; 2394 pmp = MPTOPMP(mp); 2395 2396 if (ap->a_ctllen != sizeof(struct export_args)) 2397 rc = (EINVAL); 2398 else 2399 rc = vfs_export(mp, &pmp->export, 2400 (const struct export_args *)ap->a_ctl); 2401 break; 2402 default: 2403 rc = vop_stdmountctl(ap); 2404 break; 2405 } 2406 LOCKSTOP; 2407 return (rc); 2408 } 2409 2410 /* 2411 * This handles unlinked open files after the vnode is finally dereferenced. 2412 * To avoid deadlocks it cannot be called from the normal vnode recycling 2413 * path, so we call it (1) after a unlink, rmdir, or rename, (2) on every 2414 * flush, and (3) on umount. 2415 */ 2416 void 2417 hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfs_t *pmp) 2418 { 2419 const hammer2_inode_data_t *ripdata; 2420 hammer2_inode_unlink_t *ipul; 2421 hammer2_inode_t *ip; 2422 hammer2_cluster_t *cluster; 2423 hammer2_cluster_t *cparent; 2424 2425 if (TAILQ_EMPTY(&pmp->unlinkq)) 2426 return; 2427 2428 LOCKSTART; 2429 hammer2_spin_ex(&pmp->list_spin); 2430 while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) { 2431 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry); 2432 hammer2_spin_unex(&pmp->list_spin); 2433 ip = ipul->ip; 2434 kfree(ipul, pmp->minode); 2435 2436 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS); 2437 ripdata = &hammer2_cluster_rdata(cluster)->ipdata; 2438 if (hammer2_debug & 0x400) { 2439 kprintf("hammer2: unlink on reclaim: %s refs=%d\n", 2440 ripdata->filename, ip->refs); 2441 } 2442 2443 /* 2444 * NOTE: Due to optimizations to avoid I/O on the inode for 2445 * the last unlink, ripdata->nlinks is not necessarily 2446 * 0 here. 2447 */ 2448 /* KKASSERT(ripdata->nlinks == 0); (see NOTE) */ 2449 cparent = hammer2_cluster_parent(cluster); 2450 hammer2_cluster_delete(trans, cparent, cluster, 2451 HAMMER2_DELETE_PERMANENT); 2452 hammer2_cluster_unlock(cparent); 2453 hammer2_cluster_drop(cparent); 2454 hammer2_inode_unlock(ip, cluster); /* inode lock */ 2455 hammer2_inode_drop(ip); /* ipul ref */ 2456 2457 hammer2_spin_ex(&pmp->list_spin); 2458 } 2459 hammer2_spin_unex(&pmp->list_spin); 2460 LOCKSTOP; 2461 } 2462 2463 2464 /* 2465 * KQFILTER 2466 */ 2467 static void filt_hammer2detach(struct knote *kn); 2468 static int filt_hammer2read(struct knote *kn, long hint); 2469 static int filt_hammer2write(struct knote *kn, long hint); 2470 static int filt_hammer2vnode(struct knote *kn, long hint); 2471 2472 static struct filterops hammer2read_filtops = 2473 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2474 NULL, filt_hammer2detach, filt_hammer2read }; 2475 static struct filterops hammer2write_filtops = 2476 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2477 NULL, filt_hammer2detach, filt_hammer2write }; 2478 static struct filterops hammer2vnode_filtops = 2479 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2480 NULL, filt_hammer2detach, filt_hammer2vnode }; 2481 2482 static 2483 int 2484 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap) 2485 { 2486 struct vnode *vp = ap->a_vp; 2487 struct knote *kn = ap->a_kn; 2488 2489 switch (kn->kn_filter) { 2490 case EVFILT_READ: 2491 kn->kn_fop = &hammer2read_filtops; 2492 break; 2493 case EVFILT_WRITE: 2494 kn->kn_fop = &hammer2write_filtops; 2495 break; 2496 case EVFILT_VNODE: 2497 kn->kn_fop = &hammer2vnode_filtops; 2498 break; 2499 default: 2500 return (EOPNOTSUPP); 2501 } 2502 2503 kn->kn_hook = (caddr_t)vp; 2504 2505 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2506 2507 return(0); 2508 } 2509 2510 static void 2511 filt_hammer2detach(struct knote *kn) 2512 { 2513 struct vnode *vp = (void *)kn->kn_hook; 2514 2515 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2516 } 2517 2518 static int 2519 filt_hammer2read(struct knote *kn, long hint) 2520 { 2521 struct vnode *vp = (void *)kn->kn_hook; 2522 hammer2_inode_t *ip = VTOI(vp); 2523 off_t off; 2524 2525 if (hint == NOTE_REVOKE) { 2526 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2527 return(1); 2528 } 2529 off = ip->size - kn->kn_fp->f_offset; 2530 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2531 if (kn->kn_sfflags & NOTE_OLDAPI) 2532 return(1); 2533 return (kn->kn_data != 0); 2534 } 2535 2536 2537 static int 2538 filt_hammer2write(struct knote *kn, long hint) 2539 { 2540 if (hint == NOTE_REVOKE) 2541 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2542 kn->kn_data = 0; 2543 return (1); 2544 } 2545 2546 static int 2547 filt_hammer2vnode(struct knote *kn, long hint) 2548 { 2549 if (kn->kn_sfflags & hint) 2550 kn->kn_fflags |= hint; 2551 if (hint == NOTE_REVOKE) { 2552 kn->kn_flags |= (EV_EOF | EV_NODATA); 2553 return (1); 2554 } 2555 return (kn->kn_fflags != 0); 2556 } 2557 2558 /* 2559 * FIFO VOPS 2560 */ 2561 static 2562 int 2563 hammer2_vop_markatime(struct vop_markatime_args *ap) 2564 { 2565 hammer2_inode_t *ip; 2566 struct vnode *vp; 2567 2568 vp = ap->a_vp; 2569 ip = VTOI(vp); 2570 2571 if (ip->pmp->ronly) 2572 return(EROFS); 2573 return(0); 2574 } 2575 2576 static 2577 int 2578 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap) 2579 { 2580 int error; 2581 2582 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2583 if (error) 2584 error = hammer2_vop_kqfilter(ap); 2585 return(error); 2586 } 2587 2588 /* 2589 * VOPS vector 2590 */ 2591 struct vop_ops hammer2_vnode_vops = { 2592 .vop_default = vop_defaultop, 2593 .vop_fsync = hammer2_vop_fsync, 2594 .vop_getpages = vop_stdgetpages, 2595 .vop_putpages = vop_stdputpages, 2596 .vop_access = hammer2_vop_access, 2597 .vop_advlock = hammer2_vop_advlock, 2598 .vop_close = hammer2_vop_close, 2599 .vop_nlink = hammer2_vop_nlink, 2600 .vop_ncreate = hammer2_vop_ncreate, 2601 .vop_nsymlink = hammer2_vop_nsymlink, 2602 .vop_nremove = hammer2_vop_nremove, 2603 .vop_nrmdir = hammer2_vop_nrmdir, 2604 .vop_nrename = hammer2_vop_nrename, 2605 .vop_getattr = hammer2_vop_getattr, 2606 .vop_setattr = hammer2_vop_setattr, 2607 .vop_readdir = hammer2_vop_readdir, 2608 .vop_readlink = hammer2_vop_readlink, 2609 .vop_getpages = vop_stdgetpages, 2610 .vop_putpages = vop_stdputpages, 2611 .vop_read = hammer2_vop_read, 2612 .vop_write = hammer2_vop_write, 2613 .vop_open = hammer2_vop_open, 2614 .vop_inactive = hammer2_vop_inactive, 2615 .vop_reclaim = hammer2_vop_reclaim, 2616 .vop_nresolve = hammer2_vop_nresolve, 2617 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2618 .vop_nmkdir = hammer2_vop_nmkdir, 2619 .vop_nmknod = hammer2_vop_nmknod, 2620 .vop_ioctl = hammer2_vop_ioctl, 2621 .vop_mountctl = hammer2_vop_mountctl, 2622 .vop_bmap = hammer2_vop_bmap, 2623 .vop_strategy = hammer2_vop_strategy, 2624 .vop_kqfilter = hammer2_vop_kqfilter 2625 }; 2626 2627 struct vop_ops hammer2_spec_vops = { 2628 .vop_default = vop_defaultop, 2629 .vop_fsync = hammer2_vop_fsync, 2630 .vop_read = vop_stdnoread, 2631 .vop_write = vop_stdnowrite, 2632 .vop_access = hammer2_vop_access, 2633 .vop_close = hammer2_vop_close, 2634 .vop_markatime = hammer2_vop_markatime, 2635 .vop_getattr = hammer2_vop_getattr, 2636 .vop_inactive = hammer2_vop_inactive, 2637 .vop_reclaim = hammer2_vop_reclaim, 2638 .vop_setattr = hammer2_vop_setattr 2639 }; 2640 2641 struct vop_ops hammer2_fifo_vops = { 2642 .vop_default = fifo_vnoperate, 2643 .vop_fsync = hammer2_vop_fsync, 2644 #if 0 2645 .vop_read = hammer2_vop_fiforead, 2646 .vop_write = hammer2_vop_fifowrite, 2647 #endif 2648 .vop_access = hammer2_vop_access, 2649 #if 0 2650 .vop_close = hammer2_vop_fifoclose, 2651 #endif 2652 .vop_markatime = hammer2_vop_markatime, 2653 .vop_getattr = hammer2_vop_getattr, 2654 .vop_inactive = hammer2_vop_inactive, 2655 .vop_reclaim = hammer2_vop_reclaim, 2656 .vop_setattr = hammer2_vop_setattr, 2657 .vop_kqfilter = hammer2_vop_fifokqfilter 2658 }; 2659 2660