1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * Kernel Filesystem interface 38 * 39 * NOTE! local ipdata pointers must be reloaded on any modifying operation 40 * to the inode as its underlying chain may have changed. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/buf.h> 48 #include <sys/proc.h> 49 #include <sys/namei.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/mountctl.h> 53 #include <sys/dirent.h> 54 #include <sys/uio.h> 55 #include <sys/objcache.h> 56 #include <sys/event.h> 57 #include <sys/file.h> 58 #include <vfs/fifofs/fifo.h> 59 60 #include "hammer2.h" 61 #include "hammer2_lz4.h" 62 63 #include "zlib/hammer2_zlib.h" 64 65 #define ZFOFFSET (-2LL) 66 67 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 68 int seqcount); 69 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 70 int ioflag, int seqcount); 71 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 72 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 73 static void hammer2_decompress_LZ4_callback(hammer2_io_t *dio, 74 hammer2_chain_t *arg_c, 75 void *arg_p, off_t arg_o); 76 static void hammer2_decompress_ZLIB_callback(hammer2_io_t *dio, 77 hammer2_chain_t *arg_c, 78 void *arg_p, off_t arg_o); 79 80 struct objcache *cache_buffer_read; 81 struct objcache *cache_buffer_write; 82 83 /* 84 * Callback used in read path in case that a block is compressed with LZ4. 85 */ 86 static 87 void 88 hammer2_decompress_LZ4_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c, 89 void *arg_p, off_t arg_o) 90 { 91 struct buf *obp; 92 struct bio *obio = arg_p; 93 char *bdata; 94 int bytes = 1 << (int)(arg_o & HAMMER2_OFF_MASK_RADIX); 95 96 /* 97 * If BIO_DONE is already set the device buffer was already 98 * fully valid (B_CACHE). If it is not set then I/O was issued 99 * and we have to run I/O completion as the last bio. 100 * 101 * Nobody is waiting for our device I/O to complete, we are 102 * responsible for bqrelse()ing it which means we also have to do 103 * the equivalent of biowait() and clear BIO_DONE (which breadcb() 104 * may have set). 105 * 106 * Any preexisting device buffer should match the requested size, 107 * but due to bigblock recycling and other factors there is some 108 * fragility there, so we assert that the device buffer covers 109 * the request. 110 */ 111 obp = obio->bio_buf; 112 113 if (dio->bp->b_flags & B_ERROR) { 114 obp->b_flags |= B_ERROR; 115 obp->b_error = dio->bp->b_error; 116 #if 0 117 } else if (obio->bio_caller_info2.index && 118 obio->bio_caller_info1.uvalue32 != 119 crc32(bp->b_data, bp->b_bufsize)) { 120 obp->b_flags |= B_ERROR; 121 obp->b_error = EIO; 122 #endif 123 } else { 124 char *compressed_buffer; 125 int *compressed_size; 126 int result; 127 128 KKASSERT(obp->b_bufsize <= HAMMER2_PBUFSIZE); 129 bdata = hammer2_io_data(dio, arg_o); 130 compressed_size = (int *)bdata; 131 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 132 KKASSERT((unsigned int)*compressed_size <= HAMMER2_PBUFSIZE); 133 result = LZ4_decompress_safe(&bdata[sizeof(int)], 134 compressed_buffer, 135 *compressed_size, 136 obp->b_bufsize); 137 if (result < 0) { 138 kprintf("READ PATH: Error during decompression." 139 "bio %016jx/%d log %016jx/%d\n", 140 (intmax_t)dio->pbase, dio->psize, 141 (intmax_t)arg_o, bytes); 142 /* make sure it isn't random garbage */ 143 bzero(compressed_buffer, obp->b_bufsize); 144 } 145 KKASSERT(result <= obp->b_bufsize); 146 bcopy(compressed_buffer, obp->b_data, obp->b_bufsize); 147 if (result < obp->b_bufsize) 148 bzero(obp->b_data + result, obp->b_bufsize - result); 149 objcache_put(cache_buffer_read, compressed_buffer); 150 obp->b_resid = 0; 151 obp->b_flags |= B_AGE; 152 } 153 biodone(obio); 154 } 155 156 /* 157 * Callback used in read path in case that a block is compressed with ZLIB. 158 * It is almost identical to LZ4 callback, so in theory they can be unified, 159 * but we didn't want to make changes in bio structure for that. 160 */ 161 static 162 void 163 hammer2_decompress_ZLIB_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c, 164 void *arg_p, off_t arg_o) 165 { 166 struct buf *obp; 167 struct bio *obio = arg_p; 168 char *bdata; 169 int bytes = 1 << (int)(arg_o & HAMMER2_OFF_MASK_RADIX); 170 171 /* 172 * If BIO_DONE is already set the device buffer was already 173 * fully valid (B_CACHE). If it is not set then I/O was issued 174 * and we have to run I/O completion as the last bio. 175 * 176 * Nobody is waiting for our device I/O to complete, we are 177 * responsible for bqrelse()ing it which means we also have to do 178 * the equivalent of biowait() and clear BIO_DONE (which breadcb() 179 * may have set). 180 * 181 * Any preexisting device buffer should match the requested size, 182 * but due to bigblock recycling and other factors there is some 183 * fragility there, so we assert that the device buffer covers 184 * the request. 185 */ 186 obp = obio->bio_buf; 187 188 if (dio->bp->b_flags & B_ERROR) { 189 obp->b_flags |= B_ERROR; 190 obp->b_error = dio->bp->b_error; 191 #if 0 192 } else if (obio->bio_caller_info2.index && 193 obio->bio_caller_info1.uvalue32 != 194 crc32(bp->b_data, bp->b_bufsize)) { 195 obp->b_flags |= B_ERROR; 196 obp->b_error = EIO; 197 #endif 198 } else { 199 char *compressed_buffer; 200 z_stream strm_decompress; 201 int result; 202 int ret; 203 204 KKASSERT(obp->b_bufsize <= HAMMER2_PBUFSIZE); 205 strm_decompress.avail_in = 0; 206 strm_decompress.next_in = Z_NULL; 207 208 ret = inflateInit(&strm_decompress); 209 210 if (ret != Z_OK) 211 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); 212 213 bdata = hammer2_io_data(dio, arg_o); 214 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); 215 strm_decompress.next_in = bdata; 216 217 /* XXX supply proper size, subset of device bp */ 218 strm_decompress.avail_in = bytes; 219 strm_decompress.next_out = compressed_buffer; 220 strm_decompress.avail_out = obp->b_bufsize; 221 222 ret = inflate(&strm_decompress, Z_FINISH); 223 if (ret != Z_STREAM_END) { 224 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); 225 bzero(compressed_buffer, obp->b_bufsize); 226 } 227 bcopy(compressed_buffer, obp->b_data, obp->b_bufsize); 228 result = obp->b_bufsize - strm_decompress.avail_out; 229 if (result < obp->b_bufsize) 230 bzero(obp->b_data + result, strm_decompress.avail_out); 231 objcache_put(cache_buffer_read, compressed_buffer); 232 obp->b_resid = 0; 233 obp->b_flags |= B_AGE; 234 ret = inflateEnd(&strm_decompress); 235 } 236 biodone(obio); 237 } 238 239 static __inline 240 void 241 hammer2_knote(struct vnode *vp, int flags) 242 { 243 if (flags) 244 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 245 } 246 247 /* 248 * Last reference to a vnode is going away but it is still cached. 249 */ 250 static 251 int 252 hammer2_vop_inactive(struct vop_inactive_args *ap) 253 { 254 hammer2_inode_t *ip; 255 hammer2_chain_t *parent; 256 struct vnode *vp; 257 258 vp = ap->a_vp; 259 ip = VTOI(vp); 260 261 /* 262 * Degenerate case 263 */ 264 if (ip == NULL) { 265 vrecycle(vp); 266 return (0); 267 } 268 269 /* 270 * Detect updates to the embedded data which may be synchronized by 271 * the strategy code. Simply mark the inode modified so it gets 272 * picked up by our normal flush. 273 */ 274 parent = hammer2_inode_lock_ex(ip); 275 KKASSERT(parent); 276 277 /* 278 * Check for deleted inodes and recycle immediately. 279 */ 280 if (parent->flags & HAMMER2_CHAIN_DELETED) { 281 hammer2_inode_unlock_ex(ip, parent); 282 vrecycle(vp); 283 } else { 284 hammer2_inode_unlock_ex(ip, parent); 285 } 286 return (0); 287 } 288 289 /* 290 * Reclaim a vnode so that it can be reused; after the inode is 291 * disassociated, the filesystem must manage it alone. 292 */ 293 static 294 int 295 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 296 { 297 hammer2_chain_t *chain; 298 hammer2_inode_t *ip; 299 hammer2_trans_t trans; 300 struct vnode *vp; 301 302 vp = ap->a_vp; 303 ip = VTOI(vp); 304 if (ip == NULL) 305 return(0); 306 307 /* 308 * Set update_hi so we can detect and propagate the DESTROYED 309 * bit in the flush code. 310 * 311 * ip->chain might be stale, correct it before checking as older 312 * versions of the chain are likely marked deleted even if the 313 * file hasn't been. XXX ip->chain should never be stale on 314 * reclaim. 315 */ 316 chain = hammer2_inode_lock_ex(ip); 317 #if 0 318 if (chain->next_parent) 319 kprintf("RECLAIM DUPLINKED IP: %p ip->ch=%p ch=%p np=%p\n", 320 ip, ip->chain, chain, chain->next_parent); 321 #endif 322 323 /* 324 * The final close of a deleted file or directory marks it for 325 * destruction. The DESTROYED flag allows the flusher to shortcut 326 * any modified blocks still unflushed (that is, just ignore them). 327 * 328 * HAMMER2 usually does not try to optimize the freemap by returning 329 * deleted blocks to it as it does not usually know how many snapshots 330 * might be referencing portions of the file/dir. XXX TODO. 331 * 332 * XXX TODO - However, any modified file as-of when a snapshot is made 333 * cannot use this optimization as some of the modifications 334 * may wind up being part of the snapshot. 335 */ 336 vp->v_data = NULL; 337 ip->vp = NULL; 338 if (chain->flags & HAMMER2_CHAIN_DELETED) { 339 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROYED); 340 hammer2_trans_init(&trans, ip->pmp, NULL, 341 HAMMER2_TRANS_BUFCACHE); 342 hammer2_chain_setsubmod(&trans, chain); 343 spin_lock(&chain->core->cst.spin); 344 if (chain->core->update_hi < trans.sync_tid) 345 chain->core->update_hi = trans.sync_tid; /* needed? */ 346 spin_unlock(&chain->core->cst.spin); 347 hammer2_trans_done(&trans); 348 } 349 350 /* 351 * NOTE! We do not attempt to flush chains here, flushing is 352 * really fragile and could also deadlock. 353 */ 354 vclrisdirty(vp); 355 hammer2_inode_unlock_ex(ip, chain); /* unlock */ 356 hammer2_inode_drop(ip); /* vp ref */ 357 /* chain no longer referenced */ 358 /* chain = NULL; not needed */ 359 360 /* 361 * XXX handle background sync when ip dirty, kernel will no longer 362 * notify us regarding this inode because there is no longer a 363 * vnode attached to it. 364 */ 365 366 return (0); 367 } 368 369 static 370 int 371 hammer2_vop_fsync(struct vop_fsync_args *ap) 372 { 373 hammer2_inode_t *ip; 374 hammer2_trans_t trans; 375 hammer2_chain_t *chain; 376 struct vnode *vp; 377 378 vp = ap->a_vp; 379 ip = VTOI(vp); 380 381 #if 0 382 /* XXX can't do this yet */ 383 hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_ISFLUSH); 384 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 385 hammer2_trans_clear_invfsync(&trans); 386 #endif 387 hammer2_trans_init(&trans, ip->pmp, NULL, 0); 388 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 389 390 /* 391 * Calling chain_flush here creates a lot of duplicative 392 * COW operations due to non-optimal vnode ordering. 393 * 394 * Only do it for an actual fsync() syscall. The other forms 395 * which call this function will eventually call chain_flush 396 * on the volume root as a catch-all, which is far more optimal. 397 */ 398 chain = hammer2_inode_lock_ex(ip); 399 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 400 vclrisdirty(vp); 401 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME)) 402 hammer2_inode_fsync(&trans, ip, &chain); 403 404 #if 0 405 /* 406 * XXX creates discontinuity w/modify_tid 407 */ 408 if (ap->a_flags & VOP_FSYNC_SYSCALL) { 409 hammer2_chain_flush(&trans, &chain); 410 } 411 #endif 412 hammer2_inode_unlock_ex(ip, chain); 413 hammer2_trans_done(&trans); 414 415 return (0); 416 } 417 418 static 419 int 420 hammer2_vop_access(struct vop_access_args *ap) 421 { 422 hammer2_inode_t *ip = VTOI(ap->a_vp); 423 hammer2_inode_data_t *ipdata; 424 hammer2_chain_t *chain; 425 uid_t uid; 426 gid_t gid; 427 int error; 428 429 chain = hammer2_inode_lock_sh(ip); 430 ipdata = &chain->data->ipdata; 431 uid = hammer2_to_unix_xid(&ipdata->uid); 432 gid = hammer2_to_unix_xid(&ipdata->gid); 433 error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags); 434 hammer2_inode_unlock_sh(ip, chain); 435 436 return (error); 437 } 438 439 static 440 int 441 hammer2_vop_getattr(struct vop_getattr_args *ap) 442 { 443 hammer2_inode_data_t *ipdata; 444 hammer2_chain_t *chain; 445 hammer2_pfsmount_t *pmp; 446 hammer2_inode_t *ip; 447 struct vnode *vp; 448 struct vattr *vap; 449 450 vp = ap->a_vp; 451 vap = ap->a_vap; 452 453 ip = VTOI(vp); 454 pmp = ip->pmp; 455 456 chain = hammer2_inode_lock_sh(ip); 457 ipdata = &chain->data->ipdata; 458 459 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 460 vap->va_fileid = ipdata->inum; 461 vap->va_mode = ipdata->mode; 462 vap->va_nlink = ipdata->nlinks; 463 vap->va_uid = hammer2_to_unix_xid(&ipdata->uid); 464 vap->va_gid = hammer2_to_unix_xid(&ipdata->gid); 465 vap->va_rmajor = 0; 466 vap->va_rminor = 0; 467 vap->va_size = ip->size; /* protected by shared lock */ 468 vap->va_blocksize = HAMMER2_PBUFSIZE; 469 vap->va_flags = ipdata->uflags; 470 hammer2_time_to_timespec(ipdata->ctime, &vap->va_ctime); 471 hammer2_time_to_timespec(ipdata->mtime, &vap->va_mtime); 472 hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime); 473 vap->va_gen = 1; 474 vap->va_bytes = vap->va_size; /* XXX */ 475 vap->va_type = hammer2_get_vtype(chain); 476 vap->va_filerev = 0; 477 vap->va_uid_uuid = ipdata->uid; 478 vap->va_gid_uuid = ipdata->gid; 479 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 480 VA_FSID_UUID_VALID; 481 482 hammer2_inode_unlock_sh(ip, chain); 483 484 return (0); 485 } 486 487 static 488 int 489 hammer2_vop_setattr(struct vop_setattr_args *ap) 490 { 491 hammer2_inode_data_t *ipdata; 492 hammer2_inode_t *ip; 493 hammer2_chain_t *chain; 494 hammer2_trans_t trans; 495 struct vnode *vp; 496 struct vattr *vap; 497 int error; 498 int kflags = 0; 499 int domtime = 0; 500 uint64_t ctime; 501 502 vp = ap->a_vp; 503 vap = ap->a_vap; 504 hammer2_update_time(&ctime); 505 506 ip = VTOI(vp); 507 508 if (ip->pmp->ronly) 509 return(EROFS); 510 511 hammer2_chain_memory_wait(ip->pmp); 512 hammer2_trans_init(&trans, ip->pmp, NULL, 0); 513 chain = hammer2_inode_lock_ex(ip); 514 ipdata = &chain->data->ipdata; 515 error = 0; 516 517 if (vap->va_flags != VNOVAL) { 518 u_int32_t flags; 519 520 flags = ipdata->uflags; 521 error = vop_helper_setattr_flags(&flags, vap->va_flags, 522 hammer2_to_unix_xid(&ipdata->uid), 523 ap->a_cred); 524 if (error == 0) { 525 if (ipdata->uflags != flags) { 526 ipdata = hammer2_chain_modify_ip(&trans, ip, 527 &chain, 0); 528 ipdata->uflags = flags; 529 ipdata->ctime = ctime; 530 kflags |= NOTE_ATTRIB; 531 } 532 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 533 error = 0; 534 goto done; 535 } 536 } 537 goto done; 538 } 539 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 540 error = EPERM; 541 goto done; 542 } 543 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 544 mode_t cur_mode = ipdata->mode; 545 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 546 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 547 uuid_t uuid_uid; 548 uuid_t uuid_gid; 549 550 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 551 ap->a_cred, 552 &cur_uid, &cur_gid, &cur_mode); 553 if (error == 0) { 554 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 555 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 556 if (bcmp(&uuid_uid, &ipdata->uid, sizeof(uuid_uid)) || 557 bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) || 558 ipdata->mode != cur_mode 559 ) { 560 ipdata = hammer2_chain_modify_ip(&trans, ip, 561 &chain, 0); 562 ipdata->uid = uuid_uid; 563 ipdata->gid = uuid_gid; 564 ipdata->mode = cur_mode; 565 ipdata->ctime = ctime; 566 } 567 kflags |= NOTE_ATTRIB; 568 } 569 } 570 571 /* 572 * Resize the file 573 */ 574 if (vap->va_size != VNOVAL && ip->size != vap->va_size) { 575 switch(vp->v_type) { 576 case VREG: 577 if (vap->va_size == ip->size) 578 break; 579 hammer2_inode_unlock_ex(ip, chain); 580 if (vap->va_size < ip->size) { 581 hammer2_truncate_file(ip, vap->va_size); 582 } else { 583 hammer2_extend_file(ip, vap->va_size); 584 } 585 chain = hammer2_inode_lock_ex(ip); 586 ipdata = &chain->data->ipdata; /* RELOAD */ 587 domtime = 1; 588 break; 589 default: 590 error = EINVAL; 591 goto done; 592 } 593 } 594 #if 0 595 /* atime not supported */ 596 if (vap->va_atime.tv_sec != VNOVAL) { 597 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 598 ipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 599 kflags |= NOTE_ATTRIB; 600 } 601 #endif 602 if (vap->va_mtime.tv_sec != VNOVAL) { 603 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 604 ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 605 kflags |= NOTE_ATTRIB; 606 domtime = 0; 607 } 608 if (vap->va_mode != (mode_t)VNOVAL) { 609 mode_t cur_mode = ipdata->mode; 610 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 611 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 612 613 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 614 cur_uid, cur_gid, &cur_mode); 615 if (error == 0 && ipdata->mode != cur_mode) { 616 ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); 617 ipdata->mode = cur_mode; 618 ipdata->ctime = ctime; 619 kflags |= NOTE_ATTRIB; 620 } 621 } 622 623 /* 624 * If a truncation occurred we must call inode_fsync() now in order 625 * to trim the related data chains, otherwise a later expansion can 626 * cause havoc. 627 */ 628 hammer2_inode_fsync(&trans, ip, &chain); 629 630 /* 631 * Cleanup. If domtime is set an additional inode modification 632 * must be flagged. All other modifications will have already 633 * set INODE_MODIFIED and called vsetisdirty(). 634 */ 635 done: 636 if (domtime) { 637 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED | 638 HAMMER2_INODE_MTIME); 639 vsetisdirty(ip->vp); 640 } 641 hammer2_inode_unlock_ex(ip, chain); 642 hammer2_trans_done(&trans); 643 hammer2_knote(ip->vp, kflags); 644 645 return (error); 646 } 647 648 static 649 int 650 hammer2_vop_readdir(struct vop_readdir_args *ap) 651 { 652 hammer2_inode_data_t *ipdata; 653 hammer2_inode_t *ip; 654 hammer2_inode_t *xip; 655 hammer2_chain_t *parent; 656 hammer2_chain_t *chain; 657 hammer2_chain_t *xchain; 658 hammer2_tid_t inum; 659 hammer2_key_t key_next; 660 hammer2_key_t lkey; 661 struct uio *uio; 662 off_t *cookies; 663 off_t saveoff; 664 int cookie_index; 665 int cache_index = -1; 666 int ncookies; 667 int error; 668 int dtype; 669 int r; 670 671 ip = VTOI(ap->a_vp); 672 uio = ap->a_uio; 673 saveoff = uio->uio_offset; 674 675 /* 676 * Setup cookies directory entry cookies if requested 677 */ 678 if (ap->a_ncookies) { 679 ncookies = uio->uio_resid / 16 + 1; 680 if (ncookies > 1024) 681 ncookies = 1024; 682 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 683 } else { 684 ncookies = -1; 685 cookies = NULL; 686 } 687 cookie_index = 0; 688 689 parent = hammer2_inode_lock_sh(ip); 690 ipdata = &parent->data->ipdata; 691 692 /* 693 * Handle artificial entries. To ensure that only positive 64 bit 694 * quantities are returned to userland we always strip off bit 63. 695 * The hash code is designed such that codes 0x0000-0x7FFF are not 696 * used, allowing us to use these codes for articial entries. 697 * 698 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 699 * allow '..' to cross the mount point into (e.g.) the super-root. 700 */ 701 error = 0; 702 chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 703 704 if (saveoff == 0) { 705 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 706 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 707 if (r) 708 goto done; 709 if (cookies) 710 cookies[cookie_index] = saveoff; 711 ++saveoff; 712 ++cookie_index; 713 if (cookie_index == ncookies) 714 goto done; 715 } 716 717 if (saveoff == 1) { 718 /* 719 * Be careful with lockorder when accessing ".." 720 * 721 * (ip is the current dir. xip is the parent dir). 722 */ 723 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 724 while (ip->pip != NULL && ip != ip->pmp->iroot) { 725 xip = ip->pip; 726 hammer2_inode_ref(xip); 727 hammer2_inode_unlock_sh(ip, parent); 728 xchain = hammer2_inode_lock_sh(xip); 729 parent = hammer2_inode_lock_sh(ip); 730 hammer2_inode_drop(xip); 731 if (xip == ip->pip) { 732 inum = xchain->data->ipdata.inum & 733 HAMMER2_DIRHASH_USERMSK; 734 hammer2_inode_unlock_sh(xip, xchain); 735 break; 736 } 737 hammer2_inode_unlock_sh(xip, xchain); 738 } 739 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 740 if (r) 741 goto done; 742 if (cookies) 743 cookies[cookie_index] = saveoff; 744 ++saveoff; 745 ++cookie_index; 746 if (cookie_index == ncookies) 747 goto done; 748 } 749 750 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 751 752 /* 753 * parent is the inode chain, already locked for us. Don't 754 * double lock shared locks as this will screw up upgrades. 755 */ 756 if (error) { 757 goto done; 758 } 759 chain = hammer2_chain_lookup(&parent, &key_next, lkey, lkey, 760 &cache_index, HAMMER2_LOOKUP_SHARED); 761 if (chain == NULL) { 762 chain = hammer2_chain_lookup(&parent, &key_next, 763 lkey, (hammer2_key_t)-1, 764 &cache_index, 765 HAMMER2_LOOKUP_SHARED); 766 } 767 while (chain) { 768 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 769 dtype = hammer2_get_dtype(chain); 770 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK; 771 r = vop_write_dirent(&error, uio, 772 chain->data->ipdata.inum & 773 HAMMER2_DIRHASH_USERMSK, 774 dtype, 775 chain->data->ipdata.name_len, 776 chain->data->ipdata.filename); 777 if (r) 778 break; 779 if (cookies) 780 cookies[cookie_index] = saveoff; 781 ++cookie_index; 782 } else { 783 /* XXX chain error */ 784 kprintf("bad chain type readdir %d\n", 785 chain->bref.type); 786 } 787 788 /* 789 * Keys may not be returned in order so once we have a 790 * placemarker (chain) the scan must allow the full range 791 * or some entries will be missed. 792 */ 793 chain = hammer2_chain_next(&parent, chain, &key_next, 794 key_next, (hammer2_key_t)-1, 795 &cache_index, HAMMER2_LOOKUP_SHARED); 796 if (chain) { 797 saveoff = (chain->bref.key & 798 HAMMER2_DIRHASH_USERMSK) + 1; 799 } else { 800 saveoff = (hammer2_key_t)-1; 801 } 802 if (cookie_index == ncookies) 803 break; 804 } 805 if (chain) 806 hammer2_chain_unlock(chain); 807 done: 808 hammer2_inode_unlock_sh(ip, parent); 809 if (ap->a_eofflag) 810 *ap->a_eofflag = (chain == NULL); 811 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 812 if (error && cookie_index == 0) { 813 if (cookies) { 814 kfree(cookies, M_TEMP); 815 *ap->a_ncookies = 0; 816 *ap->a_cookies = NULL; 817 } 818 } else { 819 if (cookies) { 820 *ap->a_ncookies = cookie_index; 821 *ap->a_cookies = cookies; 822 } 823 } 824 return (error); 825 } 826 827 /* 828 * hammer2_vop_readlink { vp, uio, cred } 829 */ 830 static 831 int 832 hammer2_vop_readlink(struct vop_readlink_args *ap) 833 { 834 struct vnode *vp; 835 hammer2_inode_t *ip; 836 int error; 837 838 vp = ap->a_vp; 839 if (vp->v_type != VLNK) 840 return (EINVAL); 841 ip = VTOI(vp); 842 843 error = hammer2_read_file(ip, ap->a_uio, 0); 844 return (error); 845 } 846 847 static 848 int 849 hammer2_vop_read(struct vop_read_args *ap) 850 { 851 struct vnode *vp; 852 hammer2_inode_t *ip; 853 struct uio *uio; 854 int error; 855 int seqcount; 856 int bigread; 857 858 /* 859 * Read operations supported on this vnode? 860 */ 861 vp = ap->a_vp; 862 if (vp->v_type != VREG) 863 return (EINVAL); 864 865 /* 866 * Misc 867 */ 868 ip = VTOI(vp); 869 uio = ap->a_uio; 870 error = 0; 871 872 seqcount = ap->a_ioflag >> 16; 873 bigread = (uio->uio_resid > 100 * 1024 * 1024); 874 875 error = hammer2_read_file(ip, uio, seqcount); 876 return (error); 877 } 878 879 static 880 int 881 hammer2_vop_write(struct vop_write_args *ap) 882 { 883 hammer2_inode_t *ip; 884 hammer2_trans_t trans; 885 thread_t td; 886 struct vnode *vp; 887 struct uio *uio; 888 int error; 889 int seqcount; 890 int bigwrite; 891 892 /* 893 * Read operations supported on this vnode? 894 */ 895 vp = ap->a_vp; 896 if (vp->v_type != VREG) 897 return (EINVAL); 898 899 /* 900 * Misc 901 */ 902 ip = VTOI(vp); 903 uio = ap->a_uio; 904 error = 0; 905 if (ip->pmp->ronly) 906 return (EROFS); 907 908 seqcount = ap->a_ioflag >> 16; 909 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 910 911 /* 912 * Check resource limit 913 */ 914 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 915 uio->uio_offset + uio->uio_resid > 916 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 917 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 918 return (EFBIG); 919 } 920 921 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 922 923 /* 924 * The transaction interlocks against flushes initiations 925 * (note: but will run concurrently with the actual flush). 926 */ 927 hammer2_trans_init(&trans, ip->pmp, NULL, 0); 928 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount); 929 hammer2_trans_done(&trans); 930 931 return (error); 932 } 933 934 /* 935 * Perform read operations on a file or symlink given an UNLOCKED 936 * inode and uio. 937 * 938 * The passed ip is not locked. 939 */ 940 static 941 int 942 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 943 { 944 hammer2_off_t size; 945 struct buf *bp; 946 int error; 947 948 error = 0; 949 950 /* 951 * UIO read loop. 952 */ 953 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 954 size = ip->size; 955 ccms_thread_unlock(&ip->topo_cst); 956 957 while (uio->uio_resid > 0 && uio->uio_offset < size) { 958 hammer2_key_t lbase; 959 hammer2_key_t leof; 960 int lblksize; 961 int loff; 962 int n; 963 964 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 965 &lbase, &leof); 966 967 error = cluster_read(ip->vp, leof, lbase, lblksize, 968 uio->uio_resid, seqcount * BKVASIZE, 969 &bp); 970 971 if (error) 972 break; 973 loff = (int)(uio->uio_offset - lbase); 974 n = lblksize - loff; 975 if (n > uio->uio_resid) 976 n = uio->uio_resid; 977 if (n > size - uio->uio_offset) 978 n = (int)(size - uio->uio_offset); 979 bp->b_flags |= B_AGE; 980 uiomove((char *)bp->b_data + loff, n, uio); 981 bqrelse(bp); 982 } 983 return (error); 984 } 985 986 /* 987 * Write to the file represented by the inode via the logical buffer cache. 988 * The inode may represent a regular file or a symlink. 989 * 990 * The inode must not be locked. 991 */ 992 static 993 int 994 hammer2_write_file(hammer2_inode_t *ip, 995 struct uio *uio, int ioflag, int seqcount) 996 { 997 hammer2_key_t old_eof; 998 hammer2_key_t new_eof; 999 struct buf *bp; 1000 int kflags; 1001 int error; 1002 int modified; 1003 1004 /* 1005 * Setup if append 1006 */ 1007 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1008 if (ioflag & IO_APPEND) 1009 uio->uio_offset = ip->size; 1010 old_eof = ip->size; 1011 ccms_thread_unlock(&ip->topo_cst); 1012 1013 /* 1014 * Extend the file if necessary. If the write fails at some point 1015 * we will truncate it back down to cover as much as we were able 1016 * to write. 1017 * 1018 * Doing this now makes it easier to calculate buffer sizes in 1019 * the loop. 1020 */ 1021 kflags = 0; 1022 error = 0; 1023 modified = 0; 1024 1025 if (uio->uio_offset + uio->uio_resid > old_eof) { 1026 new_eof = uio->uio_offset + uio->uio_resid; 1027 modified = 1; 1028 hammer2_extend_file(ip, new_eof); 1029 kflags |= NOTE_EXTEND; 1030 } else { 1031 new_eof = old_eof; 1032 } 1033 1034 /* 1035 * UIO write loop 1036 */ 1037 while (uio->uio_resid > 0) { 1038 hammer2_key_t lbase; 1039 int trivial; 1040 int endofblk; 1041 int lblksize; 1042 int loff; 1043 int n; 1044 1045 /* 1046 * Don't allow the buffer build to blow out the buffer 1047 * cache. 1048 */ 1049 if ((ioflag & IO_RECURSE) == 0) 1050 bwillwrite(HAMMER2_PBUFSIZE); 1051 1052 /* 1053 * This nominally tells us how much we can cluster and 1054 * what the logical buffer size needs to be. Currently 1055 * we don't try to cluster the write and just handle one 1056 * block at a time. 1057 */ 1058 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 1059 &lbase, NULL); 1060 loff = (int)(uio->uio_offset - lbase); 1061 1062 KKASSERT(lblksize <= 65536); 1063 1064 /* 1065 * Calculate bytes to copy this transfer and whether the 1066 * copy completely covers the buffer or not. 1067 */ 1068 trivial = 0; 1069 n = lblksize - loff; 1070 if (n > uio->uio_resid) { 1071 n = uio->uio_resid; 1072 if (loff == lbase && uio->uio_offset + n == new_eof) 1073 trivial = 1; 1074 endofblk = 0; 1075 } else { 1076 if (loff == 0) 1077 trivial = 1; 1078 endofblk = 1; 1079 } 1080 1081 /* 1082 * Get the buffer 1083 */ 1084 if (uio->uio_segflg == UIO_NOCOPY) { 1085 /* 1086 * Issuing a write with the same data backing the 1087 * buffer. Instantiate the buffer to collect the 1088 * backing vm pages, then read-in any missing bits. 1089 * 1090 * This case is used by vop_stdputpages(). 1091 */ 1092 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1093 if ((bp->b_flags & B_CACHE) == 0) { 1094 bqrelse(bp); 1095 error = bread(ip->vp, lbase, lblksize, &bp); 1096 } 1097 } else if (trivial) { 1098 /* 1099 * Even though we are entirely overwriting the buffer 1100 * we may still have to zero it out to avoid a 1101 * mmap/write visibility issue. 1102 */ 1103 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 1104 if ((bp->b_flags & B_CACHE) == 0) 1105 vfs_bio_clrbuf(bp); 1106 } else { 1107 /* 1108 * Partial overwrite, read in any missing bits then 1109 * replace the portion being written. 1110 * 1111 * (The strategy code will detect zero-fill physical 1112 * blocks for this case). 1113 */ 1114 error = bread(ip->vp, lbase, lblksize, &bp); 1115 if (error == 0) 1116 bheavy(bp); 1117 } 1118 1119 if (error) { 1120 brelse(bp); 1121 break; 1122 } 1123 1124 /* 1125 * Ok, copy the data in 1126 */ 1127 error = uiomove(bp->b_data + loff, n, uio); 1128 kflags |= NOTE_WRITE; 1129 modified = 1; 1130 if (error) { 1131 brelse(bp); 1132 break; 1133 } 1134 1135 /* 1136 * WARNING: Pageout daemon will issue UIO_NOCOPY writes 1137 * with IO_SYNC or IO_ASYNC set. These writes 1138 * must be handled as the pageout daemon expects. 1139 */ 1140 if (ioflag & IO_SYNC) { 1141 bwrite(bp); 1142 } else if ((ioflag & IO_DIRECT) && endofblk) { 1143 bawrite(bp); 1144 } else if (ioflag & IO_ASYNC) { 1145 bawrite(bp); 1146 } else { 1147 bdwrite(bp); 1148 } 1149 } 1150 1151 /* 1152 * Cleanup. If we extended the file EOF but failed to write through 1153 * the entire write is a failure and we have to back-up. 1154 */ 1155 if (error && new_eof != old_eof) { 1156 hammer2_truncate_file(ip, old_eof); 1157 } else if (modified) { 1158 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1159 hammer2_update_time(&ip->mtime); 1160 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME); 1161 ccms_thread_unlock(&ip->topo_cst); 1162 } 1163 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1164 hammer2_knote(ip->vp, kflags); 1165 vsetisdirty(ip->vp); 1166 1167 return error; 1168 } 1169 1170 /* 1171 * Truncate the size of a file. The inode must not be locked. 1172 * 1173 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1174 */ 1175 static 1176 void 1177 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1178 { 1179 hammer2_key_t lbase; 1180 int nblksize; 1181 1182 if (ip->vp) { 1183 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1184 nvtruncbuf(ip->vp, nsize, 1185 nblksize, (int)nsize & (nblksize - 1), 1186 0); 1187 } 1188 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1189 ip->size = nsize; 1190 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1191 ccms_thread_unlock(&ip->topo_cst); 1192 } 1193 1194 /* 1195 * Extend the size of a file. The inode must not be locked. 1196 * 1197 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED 1198 */ 1199 static 1200 void 1201 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1202 { 1203 hammer2_key_t lbase; 1204 hammer2_key_t osize; 1205 int oblksize; 1206 int nblksize; 1207 1208 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 1209 osize = ip->size; 1210 ip->size = nsize; 1211 ccms_thread_unlock(&ip->topo_cst); 1212 1213 if (ip->vp) { 1214 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL); 1215 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL); 1216 nvextendbuf(ip->vp, 1217 osize, nsize, 1218 oblksize, nblksize, 1219 -1, -1, 0); 1220 } 1221 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED); 1222 } 1223 1224 static 1225 int 1226 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1227 { 1228 hammer2_inode_t *ip; 1229 hammer2_inode_t *dip; 1230 hammer2_chain_t *parent; 1231 hammer2_chain_t *chain; 1232 hammer2_chain_t *ochain; 1233 hammer2_trans_t trans; 1234 hammer2_key_t key_next; 1235 hammer2_key_t lhc; 1236 struct namecache *ncp; 1237 const uint8_t *name; 1238 size_t name_len; 1239 int error = 0; 1240 int cache_index = -1; 1241 struct vnode *vp; 1242 1243 dip = VTOI(ap->a_dvp); 1244 ncp = ap->a_nch->ncp; 1245 name = ncp->nc_name; 1246 name_len = ncp->nc_nlen; 1247 lhc = hammer2_dirhash(name, name_len); 1248 1249 /* 1250 * Note: In DragonFly the kernel handles '.' and '..'. 1251 */ 1252 parent = hammer2_inode_lock_sh(dip); 1253 chain = hammer2_chain_lookup(&parent, &key_next, 1254 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1255 &cache_index, HAMMER2_LOOKUP_SHARED); 1256 while (chain) { 1257 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 1258 name_len == chain->data->ipdata.name_len && 1259 bcmp(name, chain->data->ipdata.filename, name_len) == 0) { 1260 break; 1261 } 1262 chain = hammer2_chain_next(&parent, chain, &key_next, 1263 key_next, 1264 lhc + HAMMER2_DIRHASH_LOMASK, 1265 &cache_index, HAMMER2_LOOKUP_SHARED); 1266 } 1267 hammer2_inode_unlock_sh(dip, parent); 1268 1269 /* 1270 * If the inode represents a forwarding entry for a hardlink we have 1271 * to locate the actual inode. The original ip is saved for possible 1272 * deconsolidation. (ip) will only be set to non-NULL when we have 1273 * to locate the real file via a hardlink. ip will be referenced but 1274 * not locked in that situation. chain is passed in locked and 1275 * returned locked. 1276 * 1277 * XXX what kind of chain lock? 1278 */ 1279 ochain = NULL; 1280 if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { 1281 error = hammer2_hardlink_find(dip, &chain, &ochain); 1282 if (error) { 1283 kprintf("hammer2: unable to find hardlink\n"); 1284 if (chain) { 1285 hammer2_chain_unlock(chain); 1286 chain = NULL; 1287 } 1288 goto failed; 1289 } 1290 } 1291 1292 /* 1293 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1294 * If an error occurs chain and ip are left alone. 1295 * 1296 * XXX upgrade shared lock? 1297 */ 1298 if (ochain && chain && 1299 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) { 1300 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1301 chain->data->ipdata.filename); 1302 /* XXX retain shared lock on dip? (currently not held) */ 1303 hammer2_trans_init(&trans, dip->pmp, NULL, 0); 1304 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1305 hammer2_trans_done(&trans); 1306 } 1307 1308 /* 1309 * Acquire the related vnode 1310 * 1311 * NOTE: For error processing, only ENOENT resolves the namecache 1312 * entry to NULL, otherwise we just return the error and 1313 * leave the namecache unresolved. 1314 * 1315 * NOTE: multiple hammer2_inode structures can be aliased to the 1316 * same chain element, for example for hardlinks. This 1317 * use case does not 'reattach' inode associations that 1318 * might already exist, but always allocates a new one. 1319 * 1320 * WARNING: inode structure is locked exclusively via inode_get 1321 * but chain was locked shared. inode_unlock_ex() 1322 * will handle it properly. 1323 */ 1324 if (chain) { 1325 ip = hammer2_inode_get(dip->pmp, dip, chain); 1326 vp = hammer2_igetv(ip, &error); 1327 if (error == 0) { 1328 vn_unlock(vp); 1329 cache_setvp(ap->a_nch, vp); 1330 } else if (error == ENOENT) { 1331 cache_setvp(ap->a_nch, NULL); 1332 } 1333 hammer2_inode_unlock_ex(ip, chain); 1334 1335 /* 1336 * The vp should not be released until after we've disposed 1337 * of our locks, because it might cause vop_inactive() to 1338 * be called. 1339 */ 1340 if (vp) 1341 vrele(vp); 1342 } else { 1343 error = ENOENT; 1344 cache_setvp(ap->a_nch, NULL); 1345 } 1346 failed: 1347 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1348 ("resolve error %d/%p chain %p ap %p\n", 1349 error, ap->a_nch->ncp->nc_vp, chain, ap)); 1350 if (ochain) 1351 hammer2_chain_drop(ochain); 1352 return error; 1353 } 1354 1355 static 1356 int 1357 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1358 { 1359 hammer2_inode_t *dip; 1360 hammer2_inode_t *ip; 1361 hammer2_chain_t *parent; 1362 int error; 1363 1364 dip = VTOI(ap->a_dvp); 1365 1366 if ((ip = dip->pip) == NULL) { 1367 *ap->a_vpp = NULL; 1368 return ENOENT; 1369 } 1370 parent = hammer2_inode_lock_ex(ip); 1371 *ap->a_vpp = hammer2_igetv(ip, &error); 1372 hammer2_inode_unlock_ex(ip, parent); 1373 1374 return error; 1375 } 1376 1377 static 1378 int 1379 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1380 { 1381 hammer2_inode_t *dip; 1382 hammer2_inode_t *nip; 1383 hammer2_trans_t trans; 1384 hammer2_chain_t *chain; 1385 struct namecache *ncp; 1386 const uint8_t *name; 1387 size_t name_len; 1388 int error; 1389 1390 dip = VTOI(ap->a_dvp); 1391 if (dip->pmp->ronly) 1392 return (EROFS); 1393 1394 ncp = ap->a_nch->ncp; 1395 name = ncp->nc_name; 1396 name_len = ncp->nc_nlen; 1397 1398 hammer2_chain_memory_wait(dip->pmp); 1399 hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); 1400 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1401 name, name_len, &chain, &error); 1402 if (error) { 1403 KKASSERT(nip == NULL); 1404 *ap->a_vpp = NULL; 1405 } else { 1406 *ap->a_vpp = hammer2_igetv(nip, &error); 1407 hammer2_inode_unlock_ex(nip, chain); 1408 } 1409 hammer2_trans_done(&trans); 1410 1411 if (error == 0) { 1412 cache_setunresolved(ap->a_nch); 1413 cache_setvp(ap->a_nch, *ap->a_vpp); 1414 } 1415 return error; 1416 } 1417 1418 /* 1419 * Return the largest contiguous physical disk range for the logical 1420 * request, in bytes. 1421 * 1422 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1423 * 1424 * Basically disabled, the logical buffer write thread has to deal with 1425 * buffers one-at-a-time. 1426 */ 1427 static 1428 int 1429 hammer2_vop_bmap(struct vop_bmap_args *ap) 1430 { 1431 *ap->a_doffsetp = NOOFFSET; 1432 if (ap->a_runp) 1433 *ap->a_runp = 0; 1434 if (ap->a_runb) 1435 *ap->a_runb = 0; 1436 return (EOPNOTSUPP); 1437 } 1438 1439 static 1440 int 1441 hammer2_vop_open(struct vop_open_args *ap) 1442 { 1443 return vop_stdopen(ap); 1444 } 1445 1446 /* 1447 * hammer2_vop_advlock { vp, id, op, fl, flags } 1448 */ 1449 static 1450 int 1451 hammer2_vop_advlock(struct vop_advlock_args *ap) 1452 { 1453 hammer2_inode_t *ip = VTOI(ap->a_vp); 1454 hammer2_chain_t *parent; 1455 hammer2_off_t size; 1456 1457 parent = hammer2_inode_lock_sh(ip); 1458 size = parent->data->ipdata.size; 1459 hammer2_inode_unlock_sh(ip, parent); 1460 return (lf_advlock(ap, &ip->advlock, size)); 1461 } 1462 1463 1464 static 1465 int 1466 hammer2_vop_close(struct vop_close_args *ap) 1467 { 1468 return vop_stdclose(ap); 1469 } 1470 1471 /* 1472 * hammer2_vop_nlink { nch, dvp, vp, cred } 1473 * 1474 * Create a hardlink from (vp) to {dvp, nch}. 1475 */ 1476 static 1477 int 1478 hammer2_vop_nlink(struct vop_nlink_args *ap) 1479 { 1480 hammer2_inode_t *dip; /* target directory to create link in */ 1481 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1482 hammer2_chain_t *chain; 1483 hammer2_trans_t trans; 1484 struct namecache *ncp; 1485 const uint8_t *name; 1486 size_t name_len; 1487 int error; 1488 1489 dip = VTOI(ap->a_dvp); 1490 if (dip->pmp->ronly) 1491 return (EROFS); 1492 1493 ncp = ap->a_nch->ncp; 1494 name = ncp->nc_name; 1495 name_len = ncp->nc_nlen; 1496 1497 /* 1498 * ip represents the file being hardlinked. The file could be a 1499 * normal file or a hardlink target if it has already been hardlinked. 1500 * If ip is a hardlinked target then ip->pip represents the location 1501 * of the hardlinked target, NOT the location of the hardlink pointer. 1502 * 1503 * Bump nlinks and potentially also create or move the hardlink 1504 * target in the parent directory common to (ip) and (dip). The 1505 * consolidation code can modify ip->chain and ip->pip. The 1506 * returned chain is locked. 1507 */ 1508 ip = VTOI(ap->a_vp); 1509 hammer2_chain_memory_wait(ip->pmp); 1510 hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_NEWINODE); 1511 1512 chain = hammer2_inode_lock_ex(ip); 1513 error = hammer2_hardlink_consolidate(&trans, ip, &chain, dip, 1); 1514 if (error) 1515 goto done; 1516 1517 /* 1518 * Create a directory entry connected to the specified chain. 1519 * The hardlink consolidation code has already adjusted ip->pip 1520 * to the common parent directory containing the actual hardlink 1521 * 1522 * (which may be different from dip where we created our hardlink 1523 * entry. ip->chain always represents the actual hardlink and not 1524 * any of the pointers to the actual hardlink). 1525 */ 1526 error = hammer2_inode_connect(&trans, 1, 1527 dip, &chain, 1528 name, name_len); 1529 if (error == 0) { 1530 cache_setunresolved(ap->a_nch); 1531 cache_setvp(ap->a_nch, ap->a_vp); 1532 } 1533 done: 1534 hammer2_inode_unlock_ex(ip, chain); 1535 hammer2_trans_done(&trans); 1536 1537 return error; 1538 } 1539 1540 /* 1541 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1542 * 1543 * The operating system has already ensured that the directory entry 1544 * does not exist and done all appropriate namespace locking. 1545 */ 1546 static 1547 int 1548 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1549 { 1550 hammer2_inode_t *dip; 1551 hammer2_inode_t *nip; 1552 hammer2_trans_t trans; 1553 hammer2_chain_t *nchain; 1554 struct namecache *ncp; 1555 const uint8_t *name; 1556 size_t name_len; 1557 int error; 1558 1559 dip = VTOI(ap->a_dvp); 1560 if (dip->pmp->ronly) 1561 return (EROFS); 1562 1563 ncp = ap->a_nch->ncp; 1564 name = ncp->nc_name; 1565 name_len = ncp->nc_nlen; 1566 hammer2_chain_memory_wait(dip->pmp); 1567 hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); 1568 1569 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1570 name, name_len, &nchain, &error); 1571 if (error) { 1572 KKASSERT(nip == NULL); 1573 *ap->a_vpp = NULL; 1574 } else { 1575 *ap->a_vpp = hammer2_igetv(nip, &error); 1576 hammer2_inode_unlock_ex(nip, nchain); 1577 } 1578 hammer2_trans_done(&trans); 1579 1580 if (error == 0) { 1581 cache_setunresolved(ap->a_nch); 1582 cache_setvp(ap->a_nch, *ap->a_vpp); 1583 } 1584 return error; 1585 } 1586 1587 /* 1588 * 1589 */ 1590 static 1591 int 1592 hammer2_vop_nmknod(struct vop_nmknod_args *ap) 1593 { 1594 hammer2_inode_t *dip; 1595 hammer2_inode_t *nip; 1596 hammer2_trans_t trans; 1597 hammer2_chain_t *nchain; 1598 struct namecache *ncp; 1599 const uint8_t *name; 1600 size_t name_len; 1601 int error; 1602 1603 dip = VTOI(ap->a_dvp); 1604 if (dip->pmp->ronly) 1605 return (EROFS); 1606 1607 ncp = ap->a_nch->ncp; 1608 name = ncp->nc_name; 1609 name_len = ncp->nc_nlen; 1610 hammer2_chain_memory_wait(dip->pmp); 1611 hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); 1612 1613 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1614 name, name_len, &nchain, &error); 1615 if (error) { 1616 KKASSERT(nip == NULL); 1617 *ap->a_vpp = NULL; 1618 } else { 1619 *ap->a_vpp = hammer2_igetv(nip, &error); 1620 hammer2_inode_unlock_ex(nip, nchain); 1621 } 1622 hammer2_trans_done(&trans); 1623 1624 if (error == 0) { 1625 cache_setunresolved(ap->a_nch); 1626 cache_setvp(ap->a_nch, *ap->a_vpp); 1627 } 1628 return error; 1629 } 1630 1631 /* 1632 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1633 */ 1634 static 1635 int 1636 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1637 { 1638 hammer2_inode_t *dip; 1639 hammer2_inode_t *nip; 1640 hammer2_chain_t *nparent; 1641 hammer2_trans_t trans; 1642 struct namecache *ncp; 1643 const uint8_t *name; 1644 size_t name_len; 1645 int error; 1646 1647 dip = VTOI(ap->a_dvp); 1648 if (dip->pmp->ronly) 1649 return (EROFS); 1650 1651 ncp = ap->a_nch->ncp; 1652 name = ncp->nc_name; 1653 name_len = ncp->nc_nlen; 1654 hammer2_chain_memory_wait(dip->pmp); 1655 hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); 1656 1657 ap->a_vap->va_type = VLNK; /* enforce type */ 1658 1659 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1660 name, name_len, &nparent, &error); 1661 if (error) { 1662 KKASSERT(nip == NULL); 1663 *ap->a_vpp = NULL; 1664 hammer2_trans_done(&trans); 1665 return error; 1666 } 1667 *ap->a_vpp = hammer2_igetv(nip, &error); 1668 1669 /* 1670 * Build the softlink (~like file data) and finalize the namecache. 1671 */ 1672 if (error == 0) { 1673 size_t bytes; 1674 struct uio auio; 1675 struct iovec aiov; 1676 hammer2_inode_data_t *nipdata; 1677 1678 nipdata = &nip->chain->data->ipdata; 1679 bytes = strlen(ap->a_target); 1680 1681 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1682 KKASSERT(nipdata->op_flags & 1683 HAMMER2_OPFLAG_DIRECTDATA); 1684 bcopy(ap->a_target, nipdata->u.data, bytes); 1685 nipdata->size = bytes; 1686 nip->size = bytes; 1687 hammer2_inode_unlock_ex(nip, nparent); 1688 } else { 1689 hammer2_inode_unlock_ex(nip, nparent); 1690 bzero(&auio, sizeof(auio)); 1691 bzero(&aiov, sizeof(aiov)); 1692 auio.uio_iov = &aiov; 1693 auio.uio_segflg = UIO_SYSSPACE; 1694 auio.uio_rw = UIO_WRITE; 1695 auio.uio_resid = bytes; 1696 auio.uio_iovcnt = 1; 1697 auio.uio_td = curthread; 1698 aiov.iov_base = ap->a_target; 1699 aiov.iov_len = bytes; 1700 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1701 nipdata = &nip->chain->data->ipdata; /* RELOAD */ 1702 /* XXX handle error */ 1703 error = 0; 1704 } 1705 } else { 1706 hammer2_inode_unlock_ex(nip, nparent); 1707 } 1708 hammer2_trans_done(&trans); 1709 1710 /* 1711 * Finalize namecache 1712 */ 1713 if (error == 0) { 1714 cache_setunresolved(ap->a_nch); 1715 cache_setvp(ap->a_nch, *ap->a_vpp); 1716 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1717 } 1718 return error; 1719 } 1720 1721 /* 1722 * hammer2_vop_nremove { nch, dvp, cred } 1723 */ 1724 static 1725 int 1726 hammer2_vop_nremove(struct vop_nremove_args *ap) 1727 { 1728 hammer2_inode_t *dip; 1729 hammer2_trans_t trans; 1730 struct namecache *ncp; 1731 const uint8_t *name; 1732 size_t name_len; 1733 int error; 1734 1735 dip = VTOI(ap->a_dvp); 1736 if (dip->pmp->ronly) 1737 return(EROFS); 1738 1739 ncp = ap->a_nch->ncp; 1740 name = ncp->nc_name; 1741 name_len = ncp->nc_nlen; 1742 hammer2_chain_memory_wait(dip->pmp); 1743 hammer2_trans_init(&trans, dip->pmp, NULL, 0); 1744 error = hammer2_unlink_file(&trans, dip, name, name_len, 0, NULL); 1745 hammer2_trans_done(&trans); 1746 if (error == 0) { 1747 cache_unlink(ap->a_nch); 1748 } 1749 return (error); 1750 } 1751 1752 /* 1753 * hammer2_vop_nrmdir { nch, dvp, cred } 1754 */ 1755 static 1756 int 1757 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1758 { 1759 hammer2_inode_t *dip; 1760 hammer2_trans_t trans; 1761 struct namecache *ncp; 1762 const uint8_t *name; 1763 size_t name_len; 1764 int error; 1765 1766 dip = VTOI(ap->a_dvp); 1767 if (dip->pmp->ronly) 1768 return(EROFS); 1769 1770 ncp = ap->a_nch->ncp; 1771 name = ncp->nc_name; 1772 name_len = ncp->nc_nlen; 1773 1774 hammer2_chain_memory_wait(dip->pmp); 1775 hammer2_trans_init(&trans, dip->pmp, NULL, 0); 1776 error = hammer2_unlink_file(&trans, dip, name, name_len, 1, NULL); 1777 hammer2_trans_done(&trans); 1778 if (error == 0) { 1779 cache_unlink(ap->a_nch); 1780 } 1781 return (error); 1782 } 1783 1784 /* 1785 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1786 */ 1787 static 1788 int 1789 hammer2_vop_nrename(struct vop_nrename_args *ap) 1790 { 1791 struct namecache *fncp; 1792 struct namecache *tncp; 1793 hammer2_inode_t *fdip; 1794 hammer2_inode_t *tdip; 1795 hammer2_inode_t *ip; 1796 hammer2_chain_t *chain; 1797 hammer2_trans_t trans; 1798 const uint8_t *fname; 1799 size_t fname_len; 1800 const uint8_t *tname; 1801 size_t tname_len; 1802 int error; 1803 int hlink; 1804 1805 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1806 return(EXDEV); 1807 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1808 return(EXDEV); 1809 1810 fdip = VTOI(ap->a_fdvp); /* source directory */ 1811 tdip = VTOI(ap->a_tdvp); /* target directory */ 1812 1813 if (fdip->pmp->ronly) 1814 return(EROFS); 1815 1816 fncp = ap->a_fnch->ncp; /* entry name in source */ 1817 fname = fncp->nc_name; 1818 fname_len = fncp->nc_nlen; 1819 1820 tncp = ap->a_tnch->ncp; /* entry name in target */ 1821 tname = tncp->nc_name; 1822 tname_len = tncp->nc_nlen; 1823 1824 hammer2_chain_memory_wait(tdip->pmp); 1825 hammer2_trans_init(&trans, tdip->pmp, NULL, 0); 1826 1827 /* 1828 * ip is the inode being renamed. If this is a hardlink then 1829 * ip represents the actual file and not the hardlink marker. 1830 */ 1831 ip = VTOI(fncp->nc_vp); 1832 chain = NULL; 1833 1834 /* 1835 * Keep a tight grip on the inode so the temporary unlinking from 1836 * the source location prior to linking to the target location 1837 * does not cause the chain to be destroyed. 1838 * 1839 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 1840 * unlinking elements from their directories. Locking 1841 * the nlinks field does not lock the whole inode. 1842 */ 1843 hammer2_inode_ref(ip); 1844 1845 /* 1846 * Remove target if it exists 1847 */ 1848 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, -1, NULL); 1849 if (error && error != ENOENT) 1850 goto done; 1851 cache_setunresolved(ap->a_tnch); 1852 1853 /* 1854 * When renaming a hardlinked file we may have to re-consolidate 1855 * the location of the hardlink target. Since the element is simply 1856 * being moved, nlinks is not modified in this case. 1857 * 1858 * If ip represents a regular file the consolidation code essentially 1859 * does nothing other than return the same locked chain that was 1860 * passed in. 1861 * 1862 * The returned chain will be locked. 1863 * 1864 * WARNING! We do not currently have a local copy of ipdata but 1865 * we do use one later remember that it must be reloaded 1866 * on any modification to the inode, including connects. 1867 */ 1868 chain = hammer2_inode_lock_ex(ip); 1869 error = hammer2_hardlink_consolidate(&trans, ip, &chain, tdip, 0); 1870 if (error) 1871 goto done; 1872 1873 /* 1874 * Disconnect (fdip, fname) from the source directory. This will 1875 * disconnect (ip) if it represents a direct file. If (ip) represents 1876 * a hardlink the HARDLINK pointer object will be removed but the 1877 * hardlink will stay intact. 1878 * 1879 * The target chain may be marked DELETED but will not be destroyed 1880 * since we retain our hold on ip and chain. 1881 */ 1882 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, -1, &hlink); 1883 KKASSERT(error != EAGAIN); 1884 if (error) 1885 goto done; 1886 1887 /* 1888 * Reconnect ip to target directory using chain. Chains cannot 1889 * actually be moved, so this will duplicate the chain in the new 1890 * spot and assign it to the ip, replacing the old chain. 1891 * 1892 * WARNING: Because recursive locks are allowed and we unlinked the 1893 * file that we have a chain-in-hand for just above, the 1894 * chain might have been delete-duplicated. We must refactor 1895 * the chain. 1896 * 1897 * WARNING: Chain locks can lock buffer cache buffers, to avoid 1898 * deadlocks we want to unlock before issuing a cache_*() 1899 * op (that might have to lock a vnode). 1900 */ 1901 hammer2_chain_refactor(&chain); 1902 error = hammer2_inode_connect(&trans, hlink, 1903 tdip, &chain, 1904 tname, tname_len); 1905 if (error == 0) { 1906 KKASSERT(chain != NULL); 1907 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), chain); 1908 cache_rename(ap->a_fnch, ap->a_tnch); 1909 } 1910 done: 1911 hammer2_inode_unlock_ex(ip, chain); 1912 hammer2_inode_drop(ip); 1913 hammer2_trans_done(&trans); 1914 1915 return (error); 1916 } 1917 1918 /* 1919 * Strategy code 1920 * 1921 * WARNING: The strategy code cannot safely use hammer2 transactions 1922 * as this can deadlock against vfs_sync's vfsync() call 1923 * if multiple flushes are queued. 1924 */ 1925 static int hammer2_strategy_read(struct vop_strategy_args *ap); 1926 static int hammer2_strategy_write(struct vop_strategy_args *ap); 1927 static void hammer2_strategy_read_callback(hammer2_io_t *dio, 1928 hammer2_chain_t *chain, 1929 void *arg_p, off_t arg_o); 1930 1931 static 1932 int 1933 hammer2_vop_strategy(struct vop_strategy_args *ap) 1934 { 1935 struct bio *biop; 1936 struct buf *bp; 1937 int error; 1938 1939 biop = ap->a_bio; 1940 bp = biop->bio_buf; 1941 1942 switch(bp->b_cmd) { 1943 case BUF_CMD_READ: 1944 error = hammer2_strategy_read(ap); 1945 ++hammer2_iod_file_read; 1946 break; 1947 case BUF_CMD_WRITE: 1948 error = hammer2_strategy_write(ap); 1949 ++hammer2_iod_file_write; 1950 break; 1951 default: 1952 bp->b_error = error = EINVAL; 1953 bp->b_flags |= B_ERROR; 1954 biodone(biop); 1955 break; 1956 } 1957 1958 return (error); 1959 } 1960 1961 static 1962 int 1963 hammer2_strategy_read(struct vop_strategy_args *ap) 1964 { 1965 struct buf *bp; 1966 struct bio *bio; 1967 struct bio *nbio; 1968 hammer2_inode_t *ip; 1969 hammer2_chain_t *parent; 1970 hammer2_chain_t *chain; 1971 hammer2_key_t key_dummy; 1972 hammer2_key_t lbase; 1973 int cache_index = -1; 1974 1975 bio = ap->a_bio; 1976 bp = bio->bio_buf; 1977 ip = VTOI(ap->a_vp); 1978 nbio = push_bio(bio); 1979 1980 lbase = bio->bio_offset; 1981 chain = NULL; 1982 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 1983 1984 parent = hammer2_inode_lock_sh(ip); 1985 chain = hammer2_chain_lookup(&parent, &key_dummy, 1986 lbase, lbase, 1987 &cache_index, 1988 HAMMER2_LOOKUP_NODATA | 1989 HAMMER2_LOOKUP_SHARED); 1990 1991 if (chain == NULL) { 1992 /* 1993 * Data is zero-fill 1994 */ 1995 bp->b_resid = 0; 1996 bp->b_error = 0; 1997 bzero(bp->b_data, bp->b_bcount); 1998 biodone(nbio); 1999 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2000 /* 2001 * Data is embedded in the inode (copy from inode). 2002 */ 2003 hammer2_chain_load_async(chain, 2004 hammer2_strategy_read_callback, 2005 nbio, 0); 2006 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2007 /* 2008 * Data is on-media, issue device I/O and copy. 2009 * 2010 * XXX direct-IO shortcut could go here XXX. 2011 */ 2012 if (HAMMER2_DEC_COMP(chain->bref.methods) == HAMMER2_COMP_LZ4) { 2013 /* 2014 * Block compression is determined by bref.methods 2015 */ 2016 hammer2_blockref_t *bref; 2017 2018 bref = &chain->bref; 2019 hammer2_io_breadcb(chain->hmp, bref->data_off, 2020 chain->bytes, 2021 hammer2_decompress_LZ4_callback, 2022 NULL, nbio, bref->data_off); 2023 /* XXX async read dev blk not protected by chain lk */ 2024 hammer2_chain_unlock(chain); 2025 } else if (HAMMER2_DEC_COMP(chain->bref.methods) == 2026 HAMMER2_COMP_ZLIB) { 2027 hammer2_blockref_t *bref; 2028 2029 bref = &chain->bref; 2030 hammer2_io_breadcb(chain->hmp, bref->data_off, 2031 chain->bytes, 2032 hammer2_decompress_ZLIB_callback, 2033 NULL, nbio, bref->data_off); 2034 /* XXX async read dev blk not protected by chain lk */ 2035 hammer2_chain_unlock(chain); 2036 } else { 2037 hammer2_chain_load_async(chain, 2038 hammer2_strategy_read_callback, 2039 nbio, 0); 2040 } 2041 } else { 2042 panic("READ PATH: hammer2_strategy_read: unknown bref type"); 2043 chain = NULL; 2044 } 2045 hammer2_inode_unlock_sh(ip, parent); 2046 return (0); 2047 } 2048 2049 /* 2050 * Read callback for block that is not compressed. 2051 */ 2052 static 2053 void 2054 hammer2_strategy_read_callback(hammer2_io_t *dio, hammer2_chain_t *chain, 2055 void *arg_p, off_t arg_o __unused) 2056 { 2057 struct bio *nbio = arg_p; 2058 struct buf *bp = nbio->bio_buf; 2059 char *data; 2060 2061 if (dio) 2062 data = hammer2_io_data(dio, chain->bref.data_off); 2063 else 2064 data = (void *)chain->data; 2065 2066 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2067 /* 2068 * Data is embedded in the inode (copy from inode). 2069 */ 2070 bcopy(((hammer2_inode_data_t *)data)->u.data, 2071 bp->b_data, HAMMER2_EMBEDDED_BYTES); 2072 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2073 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2074 bp->b_resid = 0; 2075 bp->b_error = 0; 2076 hammer2_chain_unlock(chain); 2077 biodone(nbio); 2078 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2079 /* 2080 * Data is on-media, issue device I/O and copy. 2081 * 2082 * XXX direct-IO shortcut could go here XXX. 2083 */ 2084 KKASSERT(chain->bytes <= bp->b_bcount); 2085 bcopy(data, bp->b_data, chain->bytes); 2086 if (chain->bytes < bp->b_bcount); { 2087 bzero(bp->b_data + chain->bytes, 2088 bp->b_bcount - chain->bytes); 2089 } 2090 bp->b_flags |= B_NOTMETA; 2091 bp->b_resid = 0; 2092 bp->b_error = 0; 2093 hammer2_chain_unlock(chain); 2094 biodone(nbio); 2095 } else { 2096 /* bqrelse the dio to help stabilize the call to panic() */ 2097 if (dio) 2098 hammer2_io_bqrelse(&dio); 2099 panic("hammer2_strategy_read: unknown bref type"); 2100 /*hammer2_chain_unlock(chain);*/ 2101 /*chain = NULL;*/ 2102 } 2103 } 2104 2105 static 2106 int 2107 hammer2_strategy_write(struct vop_strategy_args *ap) 2108 { 2109 hammer2_pfsmount_t *pmp; 2110 struct bio *bio; 2111 struct buf *bp; 2112 hammer2_inode_t *ip; 2113 2114 bio = ap->a_bio; 2115 bp = bio->bio_buf; 2116 ip = VTOI(ap->a_vp); 2117 pmp = ip->pmp; 2118 2119 hammer2_lwinprog_ref(pmp); 2120 mtx_lock(&pmp->wthread_mtx); 2121 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) { 2122 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2123 mtx_unlock(&pmp->wthread_mtx); 2124 wakeup(&pmp->wthread_bioq); 2125 } else { 2126 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio); 2127 mtx_unlock(&pmp->wthread_mtx); 2128 } 2129 hammer2_lwinprog_wait(pmp); 2130 2131 return(0); 2132 } 2133 2134 /* 2135 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2136 */ 2137 static 2138 int 2139 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2140 { 2141 hammer2_inode_t *ip; 2142 int error; 2143 2144 ip = VTOI(ap->a_vp); 2145 2146 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2147 ap->a_fflag, ap->a_cred); 2148 return (error); 2149 } 2150 2151 static 2152 int 2153 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2154 { 2155 struct mount *mp; 2156 hammer2_pfsmount_t *pmp; 2157 int rc; 2158 2159 switch (ap->a_op) { 2160 case (MOUNTCTL_SET_EXPORT): 2161 mp = ap->a_head.a_ops->head.vv_mount; 2162 pmp = MPTOPMP(mp); 2163 2164 if (ap->a_ctllen != sizeof(struct export_args)) 2165 rc = (EINVAL); 2166 else 2167 rc = vfs_export(mp, &pmp->export, 2168 (const struct export_args *)ap->a_ctl); 2169 break; 2170 default: 2171 rc = vop_stdmountctl(ap); 2172 break; 2173 } 2174 return (rc); 2175 } 2176 2177 /* 2178 * KQFILTER 2179 */ 2180 static void filt_hammer2detach(struct knote *kn); 2181 static int filt_hammer2read(struct knote *kn, long hint); 2182 static int filt_hammer2write(struct knote *kn, long hint); 2183 static int filt_hammer2vnode(struct knote *kn, long hint); 2184 2185 static struct filterops hammer2read_filtops = 2186 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2187 NULL, filt_hammer2detach, filt_hammer2read }; 2188 static struct filterops hammer2write_filtops = 2189 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2190 NULL, filt_hammer2detach, filt_hammer2write }; 2191 static struct filterops hammer2vnode_filtops = 2192 { FILTEROP_ISFD | FILTEROP_MPSAFE, 2193 NULL, filt_hammer2detach, filt_hammer2vnode }; 2194 2195 static 2196 int 2197 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap) 2198 { 2199 struct vnode *vp = ap->a_vp; 2200 struct knote *kn = ap->a_kn; 2201 2202 switch (kn->kn_filter) { 2203 case EVFILT_READ: 2204 kn->kn_fop = &hammer2read_filtops; 2205 break; 2206 case EVFILT_WRITE: 2207 kn->kn_fop = &hammer2write_filtops; 2208 break; 2209 case EVFILT_VNODE: 2210 kn->kn_fop = &hammer2vnode_filtops; 2211 break; 2212 default: 2213 return (EOPNOTSUPP); 2214 } 2215 2216 kn->kn_hook = (caddr_t)vp; 2217 2218 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2219 2220 return(0); 2221 } 2222 2223 static void 2224 filt_hammer2detach(struct knote *kn) 2225 { 2226 struct vnode *vp = (void *)kn->kn_hook; 2227 2228 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 2229 } 2230 2231 static int 2232 filt_hammer2read(struct knote *kn, long hint) 2233 { 2234 struct vnode *vp = (void *)kn->kn_hook; 2235 hammer2_inode_t *ip = VTOI(vp); 2236 off_t off; 2237 2238 if (hint == NOTE_REVOKE) { 2239 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2240 return(1); 2241 } 2242 off = ip->size - kn->kn_fp->f_offset; 2243 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2244 if (kn->kn_sfflags & NOTE_OLDAPI) 2245 return(1); 2246 return (kn->kn_data != 0); 2247 } 2248 2249 2250 static int 2251 filt_hammer2write(struct knote *kn, long hint) 2252 { 2253 if (hint == NOTE_REVOKE) 2254 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2255 kn->kn_data = 0; 2256 return (1); 2257 } 2258 2259 static int 2260 filt_hammer2vnode(struct knote *kn, long hint) 2261 { 2262 if (kn->kn_sfflags & hint) 2263 kn->kn_fflags |= hint; 2264 if (hint == NOTE_REVOKE) { 2265 kn->kn_flags |= (EV_EOF | EV_NODATA); 2266 return (1); 2267 } 2268 return (kn->kn_fflags != 0); 2269 } 2270 2271 /* 2272 * FIFO VOPS 2273 */ 2274 static 2275 int 2276 hammer2_vop_markatime(struct vop_markatime_args *ap) 2277 { 2278 hammer2_inode_t *ip; 2279 struct vnode *vp; 2280 2281 vp = ap->a_vp; 2282 ip = VTOI(vp); 2283 2284 if (ip->pmp->ronly) 2285 return(EROFS); 2286 return(0); 2287 } 2288 2289 static 2290 int 2291 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap) 2292 { 2293 int error; 2294 2295 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2296 if (error) 2297 error = hammer2_vop_kqfilter(ap); 2298 return(error); 2299 } 2300 2301 /* 2302 * VOPS vector 2303 */ 2304 struct vop_ops hammer2_vnode_vops = { 2305 .vop_default = vop_defaultop, 2306 .vop_fsync = hammer2_vop_fsync, 2307 .vop_getpages = vop_stdgetpages, 2308 .vop_putpages = vop_stdputpages, 2309 .vop_access = hammer2_vop_access, 2310 .vop_advlock = hammer2_vop_advlock, 2311 .vop_close = hammer2_vop_close, 2312 .vop_nlink = hammer2_vop_nlink, 2313 .vop_ncreate = hammer2_vop_ncreate, 2314 .vop_nsymlink = hammer2_vop_nsymlink, 2315 .vop_nremove = hammer2_vop_nremove, 2316 .vop_nrmdir = hammer2_vop_nrmdir, 2317 .vop_nrename = hammer2_vop_nrename, 2318 .vop_getattr = hammer2_vop_getattr, 2319 .vop_setattr = hammer2_vop_setattr, 2320 .vop_readdir = hammer2_vop_readdir, 2321 .vop_readlink = hammer2_vop_readlink, 2322 .vop_getpages = vop_stdgetpages, 2323 .vop_putpages = vop_stdputpages, 2324 .vop_read = hammer2_vop_read, 2325 .vop_write = hammer2_vop_write, 2326 .vop_open = hammer2_vop_open, 2327 .vop_inactive = hammer2_vop_inactive, 2328 .vop_reclaim = hammer2_vop_reclaim, 2329 .vop_nresolve = hammer2_vop_nresolve, 2330 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2331 .vop_nmkdir = hammer2_vop_nmkdir, 2332 .vop_nmknod = hammer2_vop_nmknod, 2333 .vop_ioctl = hammer2_vop_ioctl, 2334 .vop_mountctl = hammer2_vop_mountctl, 2335 .vop_bmap = hammer2_vop_bmap, 2336 .vop_strategy = hammer2_vop_strategy, 2337 .vop_kqfilter = hammer2_vop_kqfilter 2338 }; 2339 2340 struct vop_ops hammer2_spec_vops = { 2341 .vop_default = vop_defaultop, 2342 .vop_fsync = hammer2_vop_fsync, 2343 .vop_read = vop_stdnoread, 2344 .vop_write = vop_stdnowrite, 2345 .vop_access = hammer2_vop_access, 2346 .vop_close = hammer2_vop_close, 2347 .vop_markatime = hammer2_vop_markatime, 2348 .vop_getattr = hammer2_vop_getattr, 2349 .vop_inactive = hammer2_vop_inactive, 2350 .vop_reclaim = hammer2_vop_reclaim, 2351 .vop_setattr = hammer2_vop_setattr 2352 }; 2353 2354 struct vop_ops hammer2_fifo_vops = { 2355 .vop_default = fifo_vnoperate, 2356 .vop_fsync = hammer2_vop_fsync, 2357 #if 0 2358 .vop_read = hammer2_vop_fiforead, 2359 .vop_write = hammer2_vop_fifowrite, 2360 #endif 2361 .vop_access = hammer2_vop_access, 2362 #if 0 2363 .vop_close = hammer2_vop_fifoclose, 2364 #endif 2365 .vop_markatime = hammer2_vop_markatime, 2366 .vop_getattr = hammer2_vop_getattr, 2367 .vop_inactive = hammer2_vop_inactive, 2368 .vop_reclaim = hammer2_vop_reclaim, 2369 .vop_setattr = hammer2_vop_setattr, 2370 .vop_kqfilter = hammer2_vop_fifokqfilter 2371 }; 2372 2373