1 /* 2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/fcntl.h> 39 #include <sys/buf.h> 40 #include <sys/proc.h> 41 #include <sys/namei.h> 42 #include <sys/mount.h> 43 #include <sys/vnode.h> 44 #include <sys/mountctl.h> 45 #include <sys/dirent.h> 46 #include <sys/uio.h> 47 48 #include "hammer2.h" 49 50 #define ZFOFFSET (-2LL) 51 52 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 53 int seqcount); 54 static int hammer2_write_file(hammer2_inode_t *ip, hammer2_chain_t **chainp, 55 struct uio *uio, int ioflag, int seqcount); 56 static hammer2_off_t hammer2_assign_physical(hammer2_inode_t *ip, 57 hammer2_key_t lbase, int lblksize, int *errorp); 58 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); 59 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); 60 61 static __inline 62 void 63 hammer2_knote(struct vnode *vp, int flags) 64 { 65 if (flags) 66 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 67 } 68 69 /* 70 * Last reference to a vnode is going away but it is still cached. 71 */ 72 static 73 int 74 hammer2_vop_inactive(struct vop_inactive_args *ap) 75 { 76 hammer2_chain_t *chain; 77 hammer2_inode_t *ip; 78 struct vnode *vp; 79 #if 0 80 struct hammer2_mount *hmp; 81 #endif 82 83 vp = ap->a_vp; 84 ip = VTOI(vp); 85 86 /* 87 * Degenerate case 88 */ 89 if (ip == NULL) { 90 vrecycle(vp); 91 return (0); 92 } 93 94 /* 95 * Detect updates to the embedded data which may be synchronized by 96 * the strategy code. Simply mark the inode modified so it gets 97 * picked up by our normal flush. 98 */ 99 chain = hammer2_inode_lock_ex(ip); 100 if (ip->flags & HAMMER2_INODE_DIRTYEMBED) { 101 atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED); 102 hammer2_chain_modify(ip->hmp, chain, 0); 103 } 104 105 /* 106 * Check for deleted inodes and recycle immediately. 107 */ 108 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 109 hammer2_inode_unlock_ex(ip, chain); 110 vrecycle(vp); 111 } else { 112 hammer2_inode_unlock_ex(ip, chain); 113 } 114 return (0); 115 } 116 117 /* 118 * Reclaim a vnode so that it can be reused; after the inode is 119 * disassociated, the filesystem must manage it alone. 120 */ 121 static 122 int 123 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 124 { 125 hammer2_chain_t *chain; 126 hammer2_inode_t *ip; 127 hammer2_mount_t *hmp; 128 struct vnode *vp; 129 130 vp = ap->a_vp; 131 ip = VTOI(vp); 132 if (ip == NULL) 133 return(0); 134 hmp = ip->hmp; 135 136 /* 137 * Set SUBMODIFIED so we can detect and propagate the DESTROYED 138 * bit in the flush code. 139 */ 140 chain = hammer2_inode_lock_ex(ip); 141 vp->v_data = NULL; 142 ip->vp = NULL; 143 if (chain->flags & HAMMER2_CHAIN_DELETED) { 144 KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED); 145 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROYED | 146 HAMMER2_CHAIN_SUBMODIFIED); 147 } 148 hammer2_chain_flush(hmp, chain, 0); 149 kprintf("vop_reclaim vp %p ip %p refs %d\n", 150 vp, ip, ip->refs); 151 if (ip->refs > 2) /* (our lock + vp ref) */ 152 hammer2_inode_unlock_ex(ip, chain); /* unlock */ 153 else 154 hammer2_inode_put(ip, chain); /* unlock & disconnect */ 155 hammer2_inode_drop(ip); /* vp ref */ 156 157 /* 158 * XXX handle background sync when ip dirty, kernel will no longer 159 * notify us regarding this inode because there is no longer a 160 * vnode attached to it. 161 */ 162 163 return (0); 164 } 165 166 static 167 int 168 hammer2_vop_fsync(struct vop_fsync_args *ap) 169 { 170 hammer2_chain_t *chain; 171 hammer2_inode_t *ip; 172 hammer2_mount_t *hmp; 173 struct vnode *vp; 174 175 vp = ap->a_vp; 176 ip = VTOI(vp); 177 hmp = ip->hmp; 178 179 chain = hammer2_inode_lock_ex(ip); 180 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 181 182 /* 183 * Detect updates to the embedded data which may be synchronized by 184 * the strategy code. Simply mark the inode modified so it gets 185 * picked up by our normal flush. 186 */ 187 if (ip->flags & HAMMER2_INODE_DIRTYEMBED) { 188 atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED); 189 hammer2_chain_modify(hmp, chain, 0); 190 } 191 192 /* 193 * Calling chain_flush here creates a lot of duplicative 194 * COW operations due to non-optimal vnode ordering. 195 * 196 * Only do it for an actual fsync() syscall. The other forms 197 * which call this function will eventually call chain_flush 198 * on the volume root as a catch-all, which is far more optimal. 199 */ 200 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 201 if (ap->a_flags & VOP_FSYNC_SYSCALL) 202 hammer2_chain_flush(hmp, chain, 0); 203 hammer2_inode_unlock_ex(ip, chain); 204 return (0); 205 } 206 207 static 208 int 209 hammer2_vop_access(struct vop_access_args *ap) 210 { 211 hammer2_inode_t *ip = VTOI(ap->a_vp); 212 hammer2_chain_t *chain; 213 hammer2_inode_data_t *ipdata; 214 uid_t uid; 215 gid_t gid; 216 int error; 217 218 chain = hammer2_inode_lock_sh(ip); 219 ipdata = &chain->data->ipdata; 220 uid = hammer2_to_unix_xid(&ipdata->uid); 221 gid = hammer2_to_unix_xid(&ipdata->gid); 222 error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags); 223 hammer2_inode_unlock_sh(ip, chain); 224 225 return (error); 226 } 227 228 static 229 int 230 hammer2_vop_getattr(struct vop_getattr_args *ap) 231 { 232 hammer2_inode_data_t *ipdata; 233 hammer2_pfsmount_t *pmp; 234 hammer2_inode_t *ip; 235 hammer2_chain_t *chain; 236 struct vnode *vp; 237 struct vattr *vap; 238 239 vp = ap->a_vp; 240 vap = ap->a_vap; 241 242 ip = VTOI(vp); 243 pmp = ip->pmp; 244 245 chain = hammer2_inode_lock_sh(ip); 246 ipdata = &chain->data->ipdata; 247 248 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 249 vap->va_fileid = ipdata->inum; 250 vap->va_mode = ipdata->mode; 251 vap->va_nlink = ipdata->nlinks; 252 vap->va_uid = hammer2_to_unix_xid(&ipdata->uid); 253 vap->va_gid = hammer2_to_unix_xid(&ipdata->gid); 254 vap->va_rmajor = 0; 255 vap->va_rminor = 0; 256 vap->va_size = ipdata->size; 257 vap->va_blocksize = HAMMER2_PBUFSIZE; 258 vap->va_flags = ipdata->uflags; 259 hammer2_time_to_timespec(ipdata->ctime, &vap->va_ctime); 260 hammer2_time_to_timespec(ipdata->mtime, &vap->va_mtime); 261 hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime); 262 vap->va_gen = 1; 263 vap->va_bytes = vap->va_size; /* XXX */ 264 vap->va_type = hammer2_get_vtype(chain); 265 vap->va_filerev = 0; 266 vap->va_uid_uuid = ipdata->uid; 267 vap->va_gid_uuid = ipdata->gid; 268 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 269 VA_FSID_UUID_VALID; 270 271 hammer2_inode_unlock_sh(ip, chain); 272 273 return (0); 274 } 275 276 static 277 int 278 hammer2_vop_setattr(struct vop_setattr_args *ap) 279 { 280 hammer2_inode_data_t *ipdata; 281 hammer2_chain_t *chain; 282 hammer2_inode_t *ip; 283 hammer2_mount_t *hmp; 284 struct vnode *vp; 285 struct vattr *vap; 286 int error; 287 int kflags = 0; 288 int domtime = 0; 289 uint64_t ctime; 290 291 vp = ap->a_vp; 292 vap = ap->a_vap; 293 hammer2_update_time(&ctime); 294 295 ip = VTOI(vp); 296 hmp = ip->hmp; 297 298 if (hmp->ronly) 299 return(EROFS); 300 301 chain = hammer2_inode_lock_ex(ip); 302 ipdata = &chain->data->ipdata; 303 error = 0; 304 305 if (vap->va_flags != VNOVAL) { 306 u_int32_t flags; 307 308 flags = ipdata->uflags; 309 error = vop_helper_setattr_flags(&flags, vap->va_flags, 310 hammer2_to_unix_xid(&ipdata->uid), 311 ap->a_cred); 312 if (error == 0) { 313 if (ipdata->uflags != flags) { 314 hammer2_chain_modify(hmp, chain, 0); 315 ipdata->uflags = flags; 316 ipdata->ctime = ctime; 317 kflags |= NOTE_ATTRIB; 318 } 319 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 320 error = 0; 321 goto done; 322 } 323 } 324 goto done; 325 } 326 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 327 error = EPERM; 328 goto done; 329 } 330 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 331 mode_t cur_mode = ipdata->mode; 332 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 333 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 334 uuid_t uuid_uid; 335 uuid_t uuid_gid; 336 337 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 338 ap->a_cred, 339 &cur_uid, &cur_gid, &cur_mode); 340 if (error == 0) { 341 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 342 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 343 if (bcmp(&uuid_uid, &ipdata->uid, sizeof(uuid_uid)) || 344 bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) || 345 ipdata->mode != cur_mode 346 ) { 347 hammer2_chain_modify(hmp, chain, 0); 348 ipdata->uid = uuid_uid; 349 ipdata->gid = uuid_gid; 350 ipdata->mode = cur_mode; 351 ipdata->ctime = ctime; 352 } 353 kflags |= NOTE_ATTRIB; 354 } 355 } 356 357 /* 358 * Resize the file 359 */ 360 if (vap->va_size != VNOVAL && ipdata->size != vap->va_size) { 361 switch(vp->v_type) { 362 case VREG: 363 if (vap->va_size == ipdata->size) 364 break; 365 if (vap->va_size < ipdata->size) { 366 hammer2_truncate_file(ip, vap->va_size); 367 } else { 368 hammer2_extend_file(ip, vap->va_size); 369 } 370 domtime = 1; 371 break; 372 default: 373 error = EINVAL; 374 goto done; 375 } 376 } 377 #if 0 378 /* atime not supported */ 379 if (vap->va_atime.tv_sec != VNOVAL) { 380 hammer2_chain_modify(hmp, chain, 0); 381 ipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 382 kflags |= NOTE_ATTRIB; 383 } 384 #endif 385 if (vap->va_mtime.tv_sec != VNOVAL) { 386 hammer2_chain_modify(hmp, chain, 0); 387 ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 388 kflags |= NOTE_ATTRIB; 389 } 390 if (vap->va_mode != (mode_t)VNOVAL) { 391 mode_t cur_mode = ipdata->mode; 392 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 393 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 394 395 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 396 cur_uid, cur_gid, &cur_mode); 397 if (error == 0 && ipdata->mode != cur_mode) { 398 hammer2_chain_modify(hmp, chain, 0); 399 ipdata->mode = cur_mode; 400 ipdata->ctime = ctime; 401 kflags |= NOTE_ATTRIB; 402 } 403 } 404 done: 405 hammer2_inode_unlock_ex(ip, chain); 406 return (error); 407 } 408 409 static 410 int 411 hammer2_vop_readdir(struct vop_readdir_args *ap) 412 { 413 hammer2_inode_data_t *ipdata; 414 hammer2_mount_t *hmp; 415 hammer2_inode_t *ip; 416 hammer2_inode_t *xip; 417 hammer2_chain_t *parent; 418 hammer2_chain_t *xparent; 419 hammer2_chain_t *chain; 420 hammer2_tid_t inum; 421 hammer2_key_t lkey; 422 struct uio *uio; 423 off_t *cookies; 424 off_t saveoff; 425 int cookie_index; 426 int ncookies; 427 int error; 428 int dtype; 429 int r; 430 431 ip = VTOI(ap->a_vp); 432 hmp = ip->hmp; 433 uio = ap->a_uio; 434 saveoff = uio->uio_offset; 435 436 /* 437 * Setup cookies directory entry cookies if requested 438 */ 439 if (ap->a_ncookies) { 440 ncookies = uio->uio_resid / 16 + 1; 441 if (ncookies > 1024) 442 ncookies = 1024; 443 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 444 } else { 445 ncookies = -1; 446 cookies = NULL; 447 } 448 cookie_index = 0; 449 450 parent = hammer2_inode_lock_sh(ip); 451 ipdata = &parent->data->ipdata; 452 453 /* 454 * Handle artificial entries. To ensure that only positive 64 bit 455 * quantities are returned to userland we always strip off bit 63. 456 * The hash code is designed such that codes 0x0000-0x7FFF are not 457 * used, allowing us to use these codes for articial entries. 458 * 459 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 460 * allow '..' to cross the mount point into (e.g.) the super-root. 461 */ 462 error = 0; 463 chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 464 465 if (saveoff == 0) { 466 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 467 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 468 if (r) 469 goto done; 470 if (cookies) 471 cookies[cookie_index] = saveoff; 472 ++saveoff; 473 ++cookie_index; 474 if (cookie_index == ncookies) 475 goto done; 476 } 477 478 if (saveoff == 1) { 479 /* 480 * Be careful with lockorder when accessing ".." 481 * 482 * (parent is the current dir. xip is the parent dir). 483 */ 484 inum = parent->data->ipdata.inum & HAMMER2_DIRHASH_USERMSK; 485 while (ip->pip != NULL && ip != ip->pmp->iroot) { 486 xip = ip->pip; 487 hammer2_inode_ref(xip); 488 hammer2_inode_unlock_sh(ip, parent); 489 xparent = hammer2_inode_lock_sh(xip); 490 parent = hammer2_inode_lock_sh(ip); 491 hammer2_inode_drop(xip); 492 if (xip == ip->pip) { 493 inum = xparent->data->ipdata.inum & 494 HAMMER2_DIRHASH_USERMSK; 495 hammer2_inode_unlock_sh(xip, xparent); 496 break; 497 } 498 hammer2_inode_unlock_sh(xip, xparent); 499 } 500 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 501 if (r) 502 goto done; 503 if (cookies) 504 cookies[cookie_index] = saveoff; 505 ++saveoff; 506 ++cookie_index; 507 if (cookie_index == ncookies) 508 goto done; 509 } 510 511 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 512 513 /* 514 * parent is the inode chain, already locked for us. Don't 515 * double lock shared locks as this will screw up upgrades. 516 */ 517 if (error) { 518 goto done; 519 } 520 chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey, 521 HAMMER2_LOOKUP_SHARED); 522 if (chain == NULL) { 523 chain = hammer2_chain_lookup(hmp, &parent, 524 lkey, (hammer2_key_t)-1, 525 HAMMER2_LOOKUP_SHARED); 526 } 527 while (chain) { 528 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 529 dtype = hammer2_get_dtype(chain); 530 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK; 531 r = vop_write_dirent(&error, uio, 532 chain->data->ipdata.inum & 533 HAMMER2_DIRHASH_USERMSK, 534 dtype, 535 chain->data->ipdata.name_len, 536 chain->data->ipdata.filename); 537 if (r) 538 break; 539 if (cookies) 540 cookies[cookie_index] = saveoff; 541 ++cookie_index; 542 } else { 543 /* XXX chain error */ 544 kprintf("bad chain type readdir %d\n", 545 chain->bref.type); 546 } 547 548 /* 549 * Keys may not be returned in order so once we have a 550 * placemarker (chain) the scan must allow the full range 551 * or some entries will be missed. 552 */ 553 chain = hammer2_chain_next(hmp, &parent, chain, 554 HAMMER2_DIRHASH_VISIBLE, 555 (hammer2_key_t)-1, 556 HAMMER2_LOOKUP_SHARED); 557 if (chain) { 558 saveoff = (chain->bref.key & 559 HAMMER2_DIRHASH_USERMSK) + 1; 560 } else { 561 saveoff = (hammer2_key_t)-1; 562 } 563 if (cookie_index == ncookies) 564 break; 565 } 566 if (chain) 567 hammer2_chain_unlock(hmp, chain); 568 done: 569 hammer2_inode_unlock_sh(ip, parent); 570 if (ap->a_eofflag) 571 *ap->a_eofflag = (chain == NULL); 572 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 573 if (error && cookie_index == 0) { 574 if (cookies) { 575 kfree(cookies, M_TEMP); 576 *ap->a_ncookies = 0; 577 *ap->a_cookies = NULL; 578 } 579 } else { 580 if (cookies) { 581 *ap->a_ncookies = cookie_index; 582 *ap->a_cookies = cookies; 583 } 584 } 585 return (error); 586 } 587 588 /* 589 * hammer2_vop_readlink { vp, uio, cred } 590 */ 591 static 592 int 593 hammer2_vop_readlink(struct vop_readlink_args *ap) 594 { 595 struct vnode *vp; 596 hammer2_mount_t *hmp; 597 hammer2_inode_t *ip; 598 int error; 599 600 vp = ap->a_vp; 601 if (vp->v_type != VLNK) 602 return (EINVAL); 603 ip = VTOI(vp); 604 hmp = ip->hmp; 605 606 error = hammer2_read_file(ip, ap->a_uio, 0); 607 return (error); 608 } 609 610 static 611 int 612 hammer2_vop_read(struct vop_read_args *ap) 613 { 614 struct vnode *vp; 615 hammer2_mount_t *hmp; 616 hammer2_inode_t *ip; 617 struct uio *uio; 618 int error; 619 int seqcount; 620 int bigread; 621 622 /* 623 * Read operations supported on this vnode? 624 */ 625 vp = ap->a_vp; 626 if (vp->v_type != VREG) 627 return (EINVAL); 628 629 /* 630 * Misc 631 */ 632 ip = VTOI(vp); 633 hmp = ip->hmp; 634 uio = ap->a_uio; 635 error = 0; 636 637 seqcount = ap->a_ioflag >> 16; 638 bigread = (uio->uio_resid > 100 * 1024 * 1024); 639 640 error = hammer2_read_file(ip, uio, seqcount); 641 return (error); 642 } 643 644 static 645 int 646 hammer2_vop_write(struct vop_write_args *ap) 647 { 648 hammer2_chain_t *chain; 649 hammer2_mount_t *hmp; 650 hammer2_inode_t *ip; 651 thread_t td; 652 struct vnode *vp; 653 struct uio *uio; 654 int error; 655 int seqcount; 656 int bigwrite; 657 658 /* 659 * Read operations supported on this vnode? 660 */ 661 vp = ap->a_vp; 662 if (vp->v_type != VREG) 663 return (EINVAL); 664 665 /* 666 * Misc 667 */ 668 ip = VTOI(vp); 669 hmp = ip->hmp; 670 uio = ap->a_uio; 671 error = 0; 672 if (hmp->ronly) 673 return (EROFS); 674 675 seqcount = ap->a_ioflag >> 16; 676 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 677 678 /* 679 * Check resource limit 680 */ 681 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 682 uio->uio_offset + uio->uio_resid > 683 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 684 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 685 return (EFBIG); 686 } 687 688 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 689 690 /* 691 * ip must be locked if extending the file. 692 * ip must be locked to avoid racing a truncation. 693 * 694 * ip must be marked modified, particularly because the write 695 * might wind up being copied into the embedded data area. 696 */ 697 chain = hammer2_inode_lock_ex(ip); 698 error = hammer2_write_file(ip, &chain, uio, ap->a_ioflag, seqcount); 699 hammer2_inode_unlock_ex(ip, chain); 700 return (error); 701 } 702 703 /* 704 * Perform read operations on a file or symlink given an UNLOCKED 705 * inode and uio. 706 * 707 * The passed ip is not locked. 708 */ 709 static 710 int 711 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 712 { 713 hammer2_chain_t *chain; 714 hammer2_off_t size; 715 struct buf *bp; 716 int error; 717 718 error = 0; 719 720 /* 721 * UIO read loop. 722 * 723 * We can't hold a shared lock on ip's chain across file bread's 724 * because the bread operation will itself obtain a shared lock, 725 * resulting in one thread holding 2 shared refs. This will deadlock 726 * against temporary lock upgrades. Temporary lock upgrades are 727 * needed to insert new chain structures into a parent's RB tree. 728 * 729 * We should be able to safely retain the shared lock on ip itself. 730 */ 731 chain = hammer2_inode_lock_sh(ip); 732 size = chain->data->ipdata.size; 733 hammer2_chain_unlock(ip->hmp, chain); 734 chain = NULL; 735 736 while (uio->uio_resid > 0 && uio->uio_offset < size) { 737 hammer2_key_t lbase; 738 hammer2_key_t leof; 739 int lblksize; 740 int loff; 741 int n; 742 743 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 744 &lbase, &leof); 745 746 error = cluster_read(ip->vp, leof, lbase, lblksize, 747 uio->uio_resid, seqcount * BKVASIZE, 748 &bp); 749 750 if (error) 751 break; 752 loff = (int)(uio->uio_offset - lbase); 753 n = lblksize - loff; 754 if (n > uio->uio_resid) 755 n = uio->uio_resid; 756 if (n > size - uio->uio_offset) 757 n = (int)(size - uio->uio_offset); 758 bp->b_flags |= B_AGE; 759 uiomove((char *)bp->b_data + loff, n, uio); 760 bqrelse(bp); 761 } 762 hammer2_inode_unlock_sh(ip, chain); 763 return (error); 764 } 765 766 /* 767 * Called with a locked (ip) to do the underlying write to a file or 768 * to build the symlink target. 769 */ 770 static 771 int 772 hammer2_write_file(hammer2_inode_t *ip, hammer2_chain_t **chainp, 773 struct uio *uio, 774 int ioflag, int seqcount) 775 { 776 hammer2_inode_data_t *ipdata; 777 hammer2_key_t old_eof; 778 struct buf *bp; 779 int kflags; 780 int error; 781 int modified = 0; 782 783 /* 784 * Setup if append 785 */ 786 ipdata = &ip->chain->data->ipdata; 787 if (ioflag & IO_APPEND) 788 uio->uio_offset = ipdata->size; 789 kflags = 0; 790 error = 0; 791 792 /* 793 * Extend the file if necessary. If the write fails at some point 794 * we will truncate it back down to cover as much as we were able 795 * to write. 796 * 797 * Doing this now makes it easier to calculate buffer sizes in 798 * the loop. 799 */ 800 old_eof = ipdata->size; 801 if (uio->uio_offset + uio->uio_resid > ipdata->size) { 802 modified = 1; 803 hammer2_extend_file(ip, uio->uio_offset + uio->uio_resid); 804 kflags |= NOTE_EXTEND; 805 } 806 807 /* 808 * UIO write loop 809 */ 810 while (uio->uio_resid > 0) { 811 hammer2_key_t lbase; 812 hammer2_key_t leof; 813 int trivial; 814 int lblksize; 815 int loff; 816 int n; 817 818 /* 819 * Don't allow the buffer build to blow out the buffer 820 * cache. 821 */ 822 if ((ioflag & IO_RECURSE) == 0) { 823 /* 824 * XXX should try to leave this unlocked through 825 * the whole loop 826 */ 827 hammer2_inode_unlock_ex(ip, *chainp); 828 bwillwrite(HAMMER2_PBUFSIZE); 829 *chainp = hammer2_inode_lock_ex(ip); 830 ipdata = &(*chainp)->data->ipdata; /* reload */ 831 } 832 833 /* XXX bigwrite & signal check test */ 834 835 /* 836 * This nominally tells us how much we can cluster and 837 * what the logical buffer size needs to be. Currently 838 * we don't try to cluster the write and just handle one 839 * block at a time. 840 */ 841 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 842 &lbase, &leof); 843 loff = (int)(uio->uio_offset - lbase); 844 845 /* 846 * Calculate bytes to copy this transfer and whether the 847 * copy completely covers the buffer or not. 848 */ 849 trivial = 0; 850 n = lblksize - loff; 851 if (n > uio->uio_resid) { 852 n = uio->uio_resid; 853 if (uio->uio_offset + n == ipdata->size) 854 trivial = 1; 855 } else if (loff == 0) { 856 trivial = 1; 857 } 858 859 /* 860 * Get the buffer 861 */ 862 if (uio->uio_segflg == UIO_NOCOPY) { 863 /* 864 * Issuing a write with the same data backing the 865 * buffer. Instantiate the buffer to collect the 866 * backing vm pages, then read-in any missing bits. 867 * 868 * This case is used by vop_stdputpages(). 869 */ 870 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 871 if ((bp->b_flags & B_CACHE) == 0) { 872 bqrelse(bp); 873 error = bread(ip->vp, lbase, lblksize, &bp); 874 } 875 } else if (trivial) { 876 /* 877 * Even though we are entirely overwriting the buffer 878 * we may still have to zero it out to avoid a 879 * mmap/write visibility issue. 880 */ 881 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 882 if ((bp->b_flags & B_CACHE) == 0) 883 vfs_bio_clrbuf(bp); 884 } else { 885 /* 886 * Partial overwrite, read in any missing bits then 887 * replace the portion being written. 888 * 889 * (The strategy code will detect zero-fill physical 890 * blocks for this case). 891 */ 892 error = bread(ip->vp, lbase, lblksize, &bp); 893 if (error == 0) 894 bheavy(bp); 895 } 896 897 if (error) { 898 brelse(bp); 899 break; 900 } 901 902 /* 903 * We have to assign physical storage to the buffer we intend 904 * to dirty or write now to avoid deadlocks in the strategy 905 * code later. 906 * 907 * This can return NOOFFSET for inode-embedded data. The 908 * strategy code will take care of it in that case. 909 */ 910 bp->b_bio2.bio_offset = 911 hammer2_assign_physical(ip, lbase, lblksize, &error); 912 if (error) { 913 brelse(bp); 914 break; 915 } 916 917 /* 918 * Ok, copy the data in 919 */ 920 hammer2_inode_unlock_ex(ip, *chainp); 921 error = uiomove(bp->b_data + loff, n, uio); 922 *chainp = hammer2_inode_lock_ex(ip); 923 ipdata = &(*chainp)->data->ipdata; /* reload */ 924 kflags |= NOTE_WRITE; 925 modified = 1; 926 927 if (error) { 928 brelse(bp); 929 break; 930 } 931 932 /* XXX update ip_data.mtime */ 933 934 /* 935 * Once we dirty a buffer any cached offset becomes invalid. 936 * 937 * NOTE: For cluster_write() always use the trailing block 938 * size, which is HAMMER2_PBUFSIZE. lblksize is the 939 * eof-straddling blocksize and is incorrect. 940 */ 941 bp->b_flags |= B_AGE; 942 if (ioflag & IO_SYNC) { 943 bwrite(bp); 944 } else if ((ioflag & IO_DIRECT) && loff + n == lblksize) { 945 if (bp->b_bcount == HAMMER2_PBUFSIZE) 946 bp->b_flags |= B_CLUSTEROK; 947 bdwrite(bp); 948 } else if (ioflag & IO_ASYNC) { 949 bawrite(bp); 950 } else if (hammer2_cluster_enable) { 951 if (bp->b_bcount == HAMMER2_PBUFSIZE) 952 bp->b_flags |= B_CLUSTEROK; 953 cluster_write(bp, leof, HAMMER2_PBUFSIZE, seqcount); 954 } else { 955 if (bp->b_bcount == HAMMER2_PBUFSIZE) 956 bp->b_flags |= B_CLUSTEROK; 957 bdwrite(bp); 958 } 959 } 960 961 /* 962 * Cleanup. If we extended the file EOF but failed to write through 963 * the entire write is a failure and we have to back-up. 964 */ 965 if (error && ipdata->size != old_eof) { 966 hammer2_truncate_file(ip, old_eof); 967 } else if (modified) { 968 KKASSERT(ip->chain == *chainp); 969 hammer2_chain_modify(ip->hmp, *chainp, 0); 970 hammer2_update_time(&ipdata->mtime); 971 } 972 hammer2_knote(ip->vp, kflags); 973 return error; 974 } 975 976 /* 977 * Assign physical storage to a logical block. 978 * 979 * NOOFFSET is returned if the data is inode-embedded. In this case the 980 * strategy code will simply bcopy() the data into the inode. 981 * 982 * The inode's delta_dcount is adjusted. 983 */ 984 static 985 hammer2_off_t 986 hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase, 987 int lblksize, int *errorp) 988 { 989 hammer2_mount_t *hmp; 990 hammer2_chain_t *parent; 991 hammer2_chain_t *chain; 992 hammer2_off_t pbase; 993 994 /* 995 * Locate the chain associated with lbase, return a locked chain. 996 * However, do not instantiate any data reference (which utilizes a 997 * device buffer) because we will be using direct IO via the 998 * logical buffer cache buffer. 999 */ 1000 hmp = ip->hmp; 1001 *errorp = 0; 1002 retry: 1003 parent = hammer2_inode_lock_ex(ip); 1004 chain = hammer2_chain_lookup(hmp, &parent, 1005 lbase, lbase, 1006 HAMMER2_LOOKUP_NODATA); 1007 1008 if (chain == NULL) { 1009 /* 1010 * We found a hole, create a new chain entry. 1011 * 1012 * NOTE: DATA chains are created without device backing 1013 * store (nor do we want any). 1014 */ 1015 chain = hammer2_chain_create(hmp, parent, NULL, 1016 lbase, HAMMER2_PBUFRADIX, 1017 HAMMER2_BREF_TYPE_DATA, 1018 lblksize, errorp); 1019 if (chain == NULL) { 1020 KKASSERT(*errorp == EAGAIN); /* XXX */ 1021 hammer2_inode_unlock_ex(ip, parent); 1022 goto retry; 1023 } 1024 1025 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 1026 /*ip->delta_dcount += lblksize;*/ 1027 } else { 1028 switch (chain->bref.type) { 1029 case HAMMER2_BREF_TYPE_INODE: 1030 /* 1031 * The data is embedded in the inode. The 1032 * caller is responsible for marking the inode 1033 * modified and copying the data to the embedded 1034 * area. 1035 */ 1036 pbase = NOOFFSET; 1037 break; 1038 case HAMMER2_BREF_TYPE_DATA: 1039 if (chain->bytes != lblksize) { 1040 panic("hammer2_assign_physical: " 1041 "size mismatch %d/%d\n", 1042 lblksize, chain->bytes); 1043 } 1044 hammer2_chain_modify(hmp, chain, 1045 HAMMER2_MODIFY_OPTDATA); 1046 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 1047 break; 1048 default: 1049 panic("hammer2_assign_physical: bad type"); 1050 /* NOT REACHED */ 1051 pbase = NOOFFSET; 1052 break; 1053 } 1054 } 1055 1056 if (chain) 1057 hammer2_chain_unlock(hmp, chain); 1058 hammer2_inode_unlock_ex(ip, parent); 1059 1060 return (pbase); 1061 } 1062 1063 /* 1064 * Truncate the size of a file. 1065 * 1066 * This routine adjusts ipdata->size smaller, destroying any related 1067 * data beyond the new EOF and potentially resizing the block straddling 1068 * the EOF. 1069 * 1070 * The inode must be locked. 1071 */ 1072 static 1073 void 1074 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1075 { 1076 hammer2_inode_data_t *ipdata; 1077 hammer2_chain_t *parent; 1078 hammer2_chain_t *chain; 1079 hammer2_mount_t *hmp = ip->hmp; 1080 hammer2_key_t lbase; 1081 hammer2_key_t leof; 1082 struct buf *bp; 1083 int loff; 1084 int error; 1085 int oblksize; 1086 int nblksize; 1087 1088 hammer2_chain_modify(hmp, ip->chain, 0); 1089 bp = NULL; 1090 ipdata = &ip->chain->data->ipdata; 1091 1092 /* 1093 * Destroy any logical buffer cache buffers beyond the file EOF. 1094 * 1095 * We call nvtruncbuf() w/ trivial == 1 to prevent it from messing 1096 * around with the buffer straddling EOF, because we need to assign 1097 * a new physical offset to it. 1098 */ 1099 if (ip->vp) { 1100 nvtruncbuf(ip->vp, nsize, 1101 HAMMER2_PBUFSIZE, (int)nsize & HAMMER2_PBUFMASK, 1102 1); 1103 } 1104 1105 /* 1106 * Setup for lookup/search 1107 */ 1108 parent = ip->chain; 1109 error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 1110 if (error) { 1111 hammer2_chain_unlock(hmp, parent); 1112 /* XXX error reporting */ 1113 return; 1114 } 1115 1116 /* 1117 * Handle the case where a chain/logical-buffer straddles the new 1118 * EOF. We told nvtruncbuf() above not to mess with the logical 1119 * buffer straddling the EOF because we need to reassign its storage 1120 * and can't let the strategy code do it for us. 1121 */ 1122 loff = (int)nsize & HAMMER2_PBUFMASK; 1123 if (loff && ip->vp) { 1124 oblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof); 1125 error = bread(ip->vp, lbase, oblksize, &bp); 1126 KKASSERT(error == 0); 1127 } 1128 ipdata->size = nsize; 1129 nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof); 1130 1131 /* 1132 * Fixup the chain element. If we have a logical buffer in-hand 1133 * we don't want to create a conflicting device buffer. 1134 */ 1135 if (loff && bp) { 1136 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase, 1137 HAMMER2_LOOKUP_NODATA); 1138 if (chain) { 1139 allocbuf(bp, nblksize); 1140 switch(chain->bref.type) { 1141 case HAMMER2_BREF_TYPE_DATA: 1142 hammer2_chain_resize(ip, chain, 1143 hammer2_allocsize(nblksize), 1144 HAMMER2_MODIFY_OPTDATA); 1145 bzero(bp->b_data + loff, nblksize - loff); 1146 bp->b_bio2.bio_offset = chain->bref.data_off & 1147 HAMMER2_OFF_MASK; 1148 break; 1149 case HAMMER2_BREF_TYPE_INODE: 1150 bzero(bp->b_data + loff, nblksize - loff); 1151 bp->b_bio2.bio_offset = NOOFFSET; 1152 break; 1153 default: 1154 panic("hammer2_truncate_file: bad type"); 1155 break; 1156 } 1157 hammer2_chain_unlock(hmp, chain); 1158 if (bp->b_bcount == HAMMER2_PBUFSIZE) 1159 bp->b_flags |= B_CLUSTEROK; 1160 bdwrite(bp); 1161 } else { 1162 /* 1163 * Destroy clean buffer w/ wrong buffer size. Retain 1164 * backing store. 1165 */ 1166 bp->b_flags |= B_RELBUF; 1167 KKASSERT(bp->b_bio2.bio_offset == NOOFFSET); 1168 KKASSERT((bp->b_flags & B_DIRTY) == 0); 1169 bqrelse(bp); 1170 } 1171 } else if (loff) { 1172 /* 1173 * WARNING: This utilizes a device buffer for the data. 1174 * 1175 * This case should not occur because file truncations without 1176 * a vnode (and hence no logical buffer cache) should only 1177 * always truncate to 0-length. 1178 */ 1179 panic("hammer2_truncate_file: non-zero truncation, no-vnode"); 1180 #if 0 1181 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase, 0); 1182 if (chain) { 1183 switch(chain->bref.type) { 1184 case HAMMER2_BREF_TYPE_DATA: 1185 hammer2_chain_resize(ip, chain, 1186 hammer2_allocsize(nblksize), 1187 0); 1188 hammer2_chain_modify(hmp, chain, 0); 1189 bzero(chain->data->buf + loff, nblksize - loff); 1190 break; 1191 case HAMMER2_BREF_TYPE_INODE: 1192 if (loff < HAMMER2_EMBEDDED_BYTES) { 1193 hammer2_chain_modify(hmp, chain, 0); 1194 bzero(chain->data->ipdata.u.data + loff, 1195 HAMMER2_EMBEDDED_BYTES - loff); 1196 } 1197 break; 1198 } 1199 hammer2_chain_unlock(hmp, chain); 1200 } 1201 #endif 1202 } 1203 1204 /* 1205 * Clean up any fragmentory VM pages now that we have properly 1206 * resized the straddling buffer. These pages are no longer 1207 * part of the buffer. 1208 */ 1209 if (ip->vp) { 1210 nvtruncbuf(ip->vp, nsize, 1211 nblksize, (int)nsize & (nblksize - 1), 1212 1); 1213 } 1214 1215 /* 1216 * Destroy any physical blocks after the new EOF point. 1217 */ 1218 lbase = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64; 1219 chain = hammer2_chain_lookup(hmp, &parent, 1220 lbase, (hammer2_key_t)-1, 1221 HAMMER2_LOOKUP_NODATA); 1222 while (chain) { 1223 /* 1224 * Degenerate embedded data case, nothing to loop on. 1225 */ 1226 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 1227 hammer2_chain_unlock(hmp, chain); 1228 break; 1229 } 1230 1231 /* 1232 * Delete physical data blocks past the file EOF. 1233 */ 1234 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 1235 /*ip->delta_dcount -= chain->bytes;*/ 1236 hammer2_chain_delete(hmp, parent, chain, 0); 1237 } 1238 /* XXX check parent if empty indirect block & delete */ 1239 chain = hammer2_chain_next(hmp, &parent, chain, 1240 lbase, (hammer2_key_t)-1, 1241 HAMMER2_LOOKUP_NODATA); 1242 } 1243 hammer2_chain_unlock(hmp, parent); 1244 } 1245 1246 /* 1247 * Extend the size of a file. The inode must be locked. 1248 * 1249 * We may have to resize the block straddling the old EOF. 1250 */ 1251 static 1252 void 1253 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize) 1254 { 1255 hammer2_inode_data_t *ipdata; 1256 hammer2_mount_t *hmp; 1257 hammer2_chain_t *parent; 1258 hammer2_chain_t *chain; 1259 struct buf *bp; 1260 hammer2_key_t osize; 1261 hammer2_key_t obase; 1262 hammer2_key_t nbase; 1263 hammer2_key_t leof; 1264 int oblksize; 1265 int nblksize; 1266 int nradix; 1267 int error; 1268 1269 KKASSERT(ip->vp); 1270 hmp = ip->hmp; 1271 1272 hammer2_chain_modify(hmp, ip->chain, 0); 1273 ipdata = &ip->chain->data->ipdata; 1274 1275 /* 1276 * Nothing to do if the direct-data case is still intact 1277 */ 1278 if ((ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) && 1279 nsize <= HAMMER2_EMBEDDED_BYTES) { 1280 ipdata->size = nsize; 1281 nvextendbuf(ip->vp, 1282 ipdata->size, nsize, 1283 0, HAMMER2_EMBEDDED_BYTES, 1284 0, (int)nsize, 1285 1); 1286 return; 1287 } 1288 1289 /* 1290 * Calculate the blocksize at the original EOF and resize the block 1291 * if necessary. Adjust the file size in the inode. 1292 */ 1293 osize = ipdata->size; 1294 oblksize = hammer2_calc_logical(ip, osize, &obase, &leof); 1295 ipdata->size = nsize; 1296 nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof); 1297 1298 /* 1299 * Do all required vnode operations, but do not mess with the 1300 * buffer straddling the orignal EOF. 1301 */ 1302 nvextendbuf(ip->vp, 1303 ipdata->size, nsize, 1304 0, nblksize, 1305 0, (int)nsize & HAMMER2_PBUFMASK, 1306 1); 1307 1308 /* 1309 * Early return if we have no more work to do. 1310 */ 1311 if (obase == nbase && oblksize == nblksize && 1312 (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) { 1313 return; 1314 } 1315 1316 /* 1317 * We have work to do, including possibly resizing the buffer 1318 * at the previous EOF point and turning off DIRECTDATA mode. 1319 */ 1320 bp = NULL; 1321 if (((int)osize & HAMMER2_PBUFMASK)) { 1322 error = bread(ip->vp, obase, oblksize, &bp); 1323 KKASSERT(error == 0); 1324 1325 if (obase != nbase) { 1326 if (oblksize != HAMMER2_PBUFSIZE) 1327 allocbuf(bp, HAMMER2_PBUFSIZE); 1328 } else { 1329 if (oblksize != nblksize) 1330 allocbuf(bp, nblksize); 1331 } 1332 } 1333 1334 /* 1335 * Disable direct-data mode by loading up a buffer cache buffer 1336 * with the data, then converting the inode data area into the 1337 * inode indirect block array area. 1338 */ 1339 if (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1340 ipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA; 1341 bzero(&ipdata->u.blockset, sizeof(ipdata->u.blockset)); 1342 } 1343 1344 /* 1345 * Resize the chain element at the old EOF. 1346 */ 1347 if (((int)osize & HAMMER2_PBUFMASK)) { 1348 retry: 1349 parent = ip->chain; 1350 error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS); 1351 KKASSERT(error == 0); 1352 1353 nradix = hammer2_allocsize(nblksize); 1354 1355 chain = hammer2_chain_lookup(hmp, &parent, 1356 obase, obase, 1357 HAMMER2_LOOKUP_NODATA); 1358 if (chain == NULL) { 1359 chain = hammer2_chain_create(hmp, parent, NULL, 1360 obase, nblksize, 1361 HAMMER2_BREF_TYPE_DATA, 1362 nblksize, &error); 1363 if (chain == NULL) { 1364 KKASSERT(error == EAGAIN); 1365 hammer2_chain_unlock(hmp, parent); 1366 goto retry; 1367 } 1368 /*ip->delta_dcount += nblksize;*/ 1369 } else { 1370 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA); 1371 hammer2_chain_resize(ip, chain, nradix, 1372 HAMMER2_MODIFY_OPTDATA); 1373 } 1374 bp->b_bio2.bio_offset = chain->bref.data_off & 1375 HAMMER2_OFF_MASK; 1376 hammer2_chain_unlock(hmp, chain); 1377 if (bp->b_bcount == HAMMER2_PBUFSIZE) 1378 bp->b_flags |= B_CLUSTEROK; 1379 bdwrite(bp); 1380 hammer2_chain_unlock(hmp, parent); 1381 } 1382 } 1383 1384 static 1385 int 1386 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1387 { 1388 hammer2_inode_t *ip; 1389 hammer2_inode_t *dip; 1390 hammer2_mount_t *hmp; 1391 hammer2_chain_t *parent; 1392 hammer2_chain_t *chain; 1393 hammer2_chain_t *ochain; 1394 struct namecache *ncp; 1395 const uint8_t *name; 1396 size_t name_len; 1397 hammer2_key_t lhc; 1398 int error = 0; 1399 struct vnode *vp; 1400 1401 dip = VTOI(ap->a_dvp); 1402 hmp = dip->hmp; 1403 ncp = ap->a_nch->ncp; 1404 name = ncp->nc_name; 1405 name_len = ncp->nc_nlen; 1406 lhc = hammer2_dirhash(name, name_len); 1407 1408 /* 1409 * Note: In DragonFly the kernel handles '.' and '..'. 1410 */ 1411 parent = hammer2_inode_lock_sh(dip); 1412 chain = hammer2_chain_lookup(hmp, &parent, 1413 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1414 HAMMER2_LOOKUP_SHARED); 1415 while (chain) { 1416 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 1417 name_len == chain->data->ipdata.name_len && 1418 bcmp(name, chain->data->ipdata.filename, name_len) == 0) { 1419 break; 1420 } 1421 chain = hammer2_chain_next(hmp, &parent, chain, 1422 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1423 HAMMER2_LOOKUP_SHARED); 1424 } 1425 hammer2_inode_unlock_sh(dip, parent); 1426 1427 /* 1428 * If the inode represents a forwarding entry for a hardlink we have 1429 * to locate the actual inode. The original ip is saved for possible 1430 * deconsolidation. (ip) will only be set to non-NULL when we have 1431 * to locate the real file via a hardlink. ip will be referenced but 1432 * not locked in that situation. chain is passed in locked and 1433 * returned locked. 1434 * 1435 * XXX what kind of chain lock? 1436 */ 1437 ochain = NULL; 1438 if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { 1439 error = hammer2_hardlink_find(dip, &chain, &ochain); 1440 if (error) { 1441 kprintf("hammer2: unable to find hardlink\n"); 1442 if (chain) { 1443 hammer2_chain_unlock(hmp, chain); 1444 chain = NULL; 1445 } 1446 goto failed; 1447 } 1448 } 1449 1450 /* 1451 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1452 * If an error occurs chain and ip are left alone. 1453 * 1454 * XXX upgrade shared lock? 1455 */ 1456 if (ochain && chain && chain->data->ipdata.nlinks == 1 && !hmp->ronly) { 1457 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1458 chain->data->ipdata.filename); 1459 /* XXX retain shared lock on dip? (currently not held) */ 1460 hammer2_hardlink_deconsolidate(dip, &chain, &ochain); 1461 } 1462 1463 /* 1464 * Acquire the related vnode 1465 * 1466 * NOTE: For error processing, only ENOENT resolves the namecache 1467 * entry to NULL, otherwise we just return the error and 1468 * leave the namecache unresolved. 1469 * 1470 * NOTE: multiple hammer2_inode structures can be aliased to the 1471 * same chain element, for example for hardlinks. This 1472 * use case does not 'reattach' inode associations that 1473 * might already exist, but always allocates a new one. 1474 */ 1475 if (chain) { 1476 ip = hammer2_inode_get(dip->hmp, dip->pmp, dip, chain); 1477 vp = hammer2_igetv(ip, &error); 1478 if (error == 0) { 1479 vn_unlock(vp); 1480 cache_setvp(ap->a_nch, vp); 1481 } else if (error == ENOENT) { 1482 cache_setvp(ap->a_nch, NULL); 1483 } 1484 /* 1485 * don't break the API, chain is locked shared so unlock 1486 * it separately even though unlock_ex() currently doesn't 1487 * care. 1488 */ 1489 hammer2_inode_unlock_ex(ip, NULL); 1490 hammer2_chain_unlock(hmp, chain); 1491 1492 /* 1493 * The vp should not be released until after we've disposed 1494 * of our locks, because it might cause vop_inactive() to 1495 * be called. 1496 */ 1497 if (vp) 1498 vrele(vp); 1499 } else { 1500 error = ENOENT; 1501 cache_setvp(ap->a_nch, NULL); 1502 } 1503 failed: 1504 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1505 ("resolve error %d/%p chain %p ap %p\n", 1506 error, ap->a_nch->ncp->nc_vp, chain, ap)); 1507 if (ochain) 1508 hammer2_chain_drop(hmp, ochain); 1509 return error; 1510 } 1511 1512 static 1513 int 1514 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1515 { 1516 hammer2_chain_t *chain; 1517 hammer2_inode_t *dip; 1518 hammer2_inode_t *ip; 1519 hammer2_mount_t *hmp; 1520 int error; 1521 1522 dip = VTOI(ap->a_dvp); 1523 hmp = dip->hmp; 1524 1525 if ((ip = dip->pip) == NULL) { 1526 *ap->a_vpp = NULL; 1527 return ENOENT; 1528 } 1529 chain = hammer2_inode_lock_ex(ip); 1530 *ap->a_vpp = hammer2_igetv(ip, &error); 1531 hammer2_inode_unlock_ex(ip, chain); 1532 1533 return error; 1534 } 1535 1536 static 1537 int 1538 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1539 { 1540 hammer2_chain_t *nchain; 1541 hammer2_mount_t *hmp; 1542 hammer2_inode_t *dip; 1543 hammer2_inode_t *nip; 1544 struct namecache *ncp; 1545 const uint8_t *name; 1546 size_t name_len; 1547 int error; 1548 1549 dip = VTOI(ap->a_dvp); 1550 hmp = dip->hmp; 1551 if (hmp->ronly) 1552 return (EROFS); 1553 1554 ncp = ap->a_nch->ncp; 1555 name = ncp->nc_name; 1556 name_len = ncp->nc_nlen; 1557 1558 error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred, 1559 name, name_len, &nip, &nchain); 1560 if (error) { 1561 KKASSERT(nip == NULL); 1562 *ap->a_vpp = NULL; 1563 return error; 1564 } 1565 *ap->a_vpp = hammer2_igetv(nip, &error); 1566 hammer2_inode_unlock_ex(nip, nchain); 1567 1568 if (error == 0) { 1569 cache_setunresolved(ap->a_nch); 1570 cache_setvp(ap->a_nch, *ap->a_vpp); 1571 } 1572 return error; 1573 } 1574 1575 /* 1576 * Return the largest contiguous physical disk range for the logical 1577 * request. 1578 * 1579 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1580 */ 1581 static 1582 int 1583 hammer2_vop_bmap(struct vop_bmap_args *ap) 1584 { 1585 struct vnode *vp; 1586 hammer2_mount_t *hmp; 1587 hammer2_inode_t *ip; 1588 hammer2_chain_t *parent; 1589 hammer2_chain_t *chain; 1590 hammer2_key_t lbeg; 1591 hammer2_key_t lend; 1592 hammer2_off_t pbeg; 1593 hammer2_off_t pbytes; 1594 hammer2_off_t array[HAMMER2_BMAP_COUNT][2]; 1595 int loff; 1596 int ai; 1597 1598 /* 1599 * Only supported on regular files 1600 * 1601 * Only supported for read operations (required for cluster_read). 1602 * The block allocation is delayed for write operations. 1603 */ 1604 vp = ap->a_vp; 1605 if (vp->v_type != VREG) 1606 return (EOPNOTSUPP); 1607 if (ap->a_cmd != BUF_CMD_READ) 1608 return (EOPNOTSUPP); 1609 1610 ip = VTOI(vp); 1611 hmp = ip->hmp; 1612 bzero(array, sizeof(array)); 1613 1614 /* 1615 * Calculate logical range 1616 */ 1617 KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0); 1618 lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI; 1619 lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1; 1620 if (lend < lbeg) 1621 lend = lbeg; 1622 loff = ap->a_loffset & HAMMER2_OFF_MASK_LO; 1623 1624 parent = hammer2_inode_lock_sh(ip); 1625 chain = hammer2_chain_lookup(hmp, &parent, 1626 lbeg, lend, 1627 HAMMER2_LOOKUP_NODATA | 1628 HAMMER2_LOOKUP_SHARED); 1629 if (chain == NULL) { 1630 *ap->a_doffsetp = ZFOFFSET; 1631 hammer2_inode_unlock_sh(ip, parent); 1632 return (0); 1633 } 1634 1635 while (chain) { 1636 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 1637 ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE; 1638 KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT); 1639 array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK; 1640 array[ai][1] = chain->bytes; 1641 } 1642 chain = hammer2_chain_next(hmp, &parent, chain, 1643 lbeg, lend, 1644 HAMMER2_LOOKUP_NODATA | 1645 HAMMER2_LOOKUP_SHARED); 1646 } 1647 hammer2_inode_unlock_sh(ip, parent); 1648 1649 /* 1650 * If the requested loffset is not mappable physically we can't 1651 * bmap. The caller will have to access the file data via a 1652 * device buffer. 1653 */ 1654 if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_LBUFSIZE) { 1655 *ap->a_doffsetp = NOOFFSET; 1656 return (0); 1657 } 1658 1659 /* 1660 * Calculate the physical disk offset range for array[0] 1661 */ 1662 pbeg = array[0][0] + loff; 1663 pbytes = array[0][1] - loff; 1664 1665 for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) { 1666 if (array[ai][0] != pbeg + pbytes) 1667 break; 1668 pbytes += array[ai][1]; 1669 } 1670 1671 *ap->a_doffsetp = pbeg; 1672 if (ap->a_runp) 1673 *ap->a_runp = pbytes; 1674 return (0); 1675 } 1676 1677 static 1678 int 1679 hammer2_vop_open(struct vop_open_args *ap) 1680 { 1681 return vop_stdopen(ap); 1682 } 1683 1684 /* 1685 * hammer2_vop_advlock { vp, id, op, fl, flags } 1686 */ 1687 static 1688 int 1689 hammer2_vop_advlock(struct vop_advlock_args *ap) 1690 { 1691 hammer2_inode_t *ip = VTOI(ap->a_vp); 1692 hammer2_chain_t *chain; 1693 hammer2_off_t size; 1694 1695 chain = hammer2_inode_lock_sh(ip); 1696 size = chain->data->ipdata.size; 1697 hammer2_inode_unlock_sh(ip, chain); 1698 return (lf_advlock(ap, &ip->advlock, size)); 1699 } 1700 1701 1702 static 1703 int 1704 hammer2_vop_close(struct vop_close_args *ap) 1705 { 1706 return vop_stdclose(ap); 1707 } 1708 1709 /* 1710 * hammer2_vop_nlink { nch, dvp, vp, cred } 1711 * 1712 * Create a hardlink from (vp) to {dvp, nch}. 1713 */ 1714 static 1715 int 1716 hammer2_vop_nlink(struct vop_nlink_args *ap) 1717 { 1718 hammer2_inode_t *dip; /* target directory to create link in */ 1719 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1720 hammer2_mount_t *hmp; 1721 hammer2_chain_t *chain; 1722 struct namecache *ncp; 1723 const uint8_t *name; 1724 size_t name_len; 1725 int error; 1726 1727 dip = VTOI(ap->a_dvp); 1728 hmp = dip->hmp; 1729 if (hmp->ronly) 1730 return (EROFS); 1731 1732 ncp = ap->a_nch->ncp; 1733 name = ncp->nc_name; 1734 name_len = ncp->nc_nlen; 1735 1736 /* 1737 * ip represents the file being hardlinked. The file could be a 1738 * normal file or a hardlink target if it has already been hardlinked. 1739 * If ip is a hardlinked target then ip->pip represents the location 1740 * of the hardlinked target, NOT the location of the hardlink pointer. 1741 * 1742 * Bump nlinks and potentially also create or move the hardlink 1743 * target in the parent directory common to (ip) and (dip). The 1744 * consolidation code can modify ip->chain and ip->pip. The 1745 * returned chain is locked. 1746 */ 1747 ip = VTOI(ap->a_vp); 1748 hammer2_inode_ref(ip); 1749 error = hammer2_hardlink_consolidate(ip, &chain, dip, 1); 1750 if (error) 1751 goto done; 1752 1753 /* 1754 * Create a directory entry connected to the specified chain. 1755 * This function unlocks and NULL's chain on return. 1756 */ 1757 error = hammer2_inode_connect(dip, &chain, name, name_len); 1758 if (error == 0) { 1759 cache_setunresolved(ap->a_nch); 1760 cache_setvp(ap->a_nch, ap->a_vp); 1761 } 1762 done: 1763 hammer2_inode_drop(ip); 1764 return error; 1765 } 1766 1767 /* 1768 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1769 * 1770 * The operating system has already ensured that the directory entry 1771 * does not exist and done all appropriate namespace locking. 1772 */ 1773 static 1774 int 1775 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1776 { 1777 hammer2_mount_t *hmp; 1778 hammer2_inode_t *dip; 1779 hammer2_inode_t *nip; 1780 hammer2_chain_t *nchain; 1781 struct namecache *ncp; 1782 const uint8_t *name; 1783 size_t name_len; 1784 int error; 1785 1786 dip = VTOI(ap->a_dvp); 1787 hmp = dip->hmp; 1788 if (hmp->ronly) 1789 return (EROFS); 1790 1791 ncp = ap->a_nch->ncp; 1792 name = ncp->nc_name; 1793 name_len = ncp->nc_nlen; 1794 1795 error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred, 1796 name, name_len, &nip, &nchain); 1797 if (error) { 1798 KKASSERT(nip == NULL); 1799 *ap->a_vpp = NULL; 1800 return error; 1801 } 1802 *ap->a_vpp = hammer2_igetv(nip, &error); 1803 hammer2_inode_unlock_ex(nip, nchain); 1804 1805 if (error == 0) { 1806 cache_setunresolved(ap->a_nch); 1807 cache_setvp(ap->a_nch, *ap->a_vpp); 1808 } 1809 return error; 1810 } 1811 1812 /* 1813 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1814 */ 1815 static 1816 int 1817 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1818 { 1819 hammer2_mount_t *hmp; 1820 hammer2_inode_t *dip; 1821 hammer2_inode_t *nip; 1822 hammer2_chain_t *nchain; 1823 struct namecache *ncp; 1824 const uint8_t *name; 1825 size_t name_len; 1826 int error; 1827 1828 dip = VTOI(ap->a_dvp); 1829 hmp = dip->hmp; 1830 if (hmp->ronly) 1831 return (EROFS); 1832 1833 ncp = ap->a_nch->ncp; 1834 name = ncp->nc_name; 1835 name_len = ncp->nc_nlen; 1836 1837 ap->a_vap->va_type = VLNK; /* enforce type */ 1838 1839 error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred, 1840 name, name_len, &nip, &nchain); 1841 if (error) { 1842 KKASSERT(nip == NULL); 1843 *ap->a_vpp = NULL; 1844 return error; 1845 } 1846 *ap->a_vpp = hammer2_igetv(nip, &error); 1847 1848 /* 1849 * Build the softlink (~like file data) and finalize the namecache. 1850 */ 1851 if (error == 0) { 1852 size_t bytes; 1853 struct uio auio; 1854 struct iovec aiov; 1855 hammer2_inode_data_t *nipdata; 1856 1857 nipdata = &nchain->data->ipdata; 1858 bytes = strlen(ap->a_target); 1859 1860 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1861 KKASSERT(nipdata->op_flags & 1862 HAMMER2_OPFLAG_DIRECTDATA); 1863 bcopy(ap->a_target, nipdata->u.data, bytes); 1864 nipdata->size = bytes; 1865 } else { 1866 bzero(&auio, sizeof(auio)); 1867 bzero(&aiov, sizeof(aiov)); 1868 auio.uio_iov = &aiov; 1869 auio.uio_segflg = UIO_SYSSPACE; 1870 auio.uio_rw = UIO_WRITE; 1871 auio.uio_resid = bytes; 1872 auio.uio_iovcnt = 1; 1873 auio.uio_td = curthread; 1874 aiov.iov_base = ap->a_target; 1875 aiov.iov_len = bytes; 1876 error = hammer2_write_file(nip, &nchain, 1877 &auio, IO_APPEND, 0); 1878 /* XXX handle error */ 1879 error = 0; 1880 } 1881 } 1882 hammer2_inode_unlock_ex(nip, nchain); 1883 1884 /* 1885 * Finalize namecache 1886 */ 1887 if (error == 0) { 1888 cache_setunresolved(ap->a_nch); 1889 cache_setvp(ap->a_nch, *ap->a_vpp); 1890 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1891 } 1892 return error; 1893 } 1894 1895 /* 1896 * hammer2_vop_nremove { nch, dvp, cred } 1897 */ 1898 static 1899 int 1900 hammer2_vop_nremove(struct vop_nremove_args *ap) 1901 { 1902 hammer2_inode_t *dip; 1903 hammer2_mount_t *hmp; 1904 struct namecache *ncp; 1905 const uint8_t *name; 1906 size_t name_len; 1907 int error; 1908 1909 dip = VTOI(ap->a_dvp); 1910 hmp = dip->hmp; 1911 if (hmp->ronly) 1912 return(EROFS); 1913 1914 ncp = ap->a_nch->ncp; 1915 name = ncp->nc_name; 1916 name_len = ncp->nc_nlen; 1917 1918 error = hammer2_unlink_file(dip, name, name_len, 0, NULL); 1919 if (error == 0) { 1920 cache_unlink(ap->a_nch); 1921 } 1922 return (error); 1923 } 1924 1925 /* 1926 * hammer2_vop_nrmdir { nch, dvp, cred } 1927 */ 1928 static 1929 int 1930 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1931 { 1932 hammer2_inode_t *dip; 1933 hammer2_mount_t *hmp; 1934 struct namecache *ncp; 1935 const uint8_t *name; 1936 size_t name_len; 1937 int error; 1938 1939 dip = VTOI(ap->a_dvp); 1940 hmp = dip->hmp; 1941 if (hmp->ronly) 1942 return(EROFS); 1943 1944 ncp = ap->a_nch->ncp; 1945 name = ncp->nc_name; 1946 name_len = ncp->nc_nlen; 1947 1948 error = hammer2_unlink_file(dip, name, name_len, 1, NULL); 1949 if (error == 0) { 1950 cache_unlink(ap->a_nch); 1951 } 1952 return (error); 1953 } 1954 1955 /* 1956 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1957 */ 1958 static 1959 int 1960 hammer2_vop_nrename(struct vop_nrename_args *ap) 1961 { 1962 struct namecache *fncp; 1963 struct namecache *tncp; 1964 hammer2_inode_t *fdip; 1965 hammer2_inode_t *tdip; 1966 hammer2_inode_t *ip; 1967 hammer2_chain_t *chain; 1968 hammer2_mount_t *hmp; 1969 const uint8_t *fname; 1970 size_t fname_len; 1971 const uint8_t *tname; 1972 size_t tname_len; 1973 int error; 1974 1975 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1976 return(EXDEV); 1977 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1978 return(EXDEV); 1979 1980 fdip = VTOI(ap->a_fdvp); /* source directory */ 1981 tdip = VTOI(ap->a_tdvp); /* target directory */ 1982 1983 hmp = fdip->hmp; /* check read-only filesystem */ 1984 if (hmp->ronly) 1985 return(EROFS); 1986 1987 fncp = ap->a_fnch->ncp; /* entry name in source */ 1988 fname = fncp->nc_name; 1989 fname_len = fncp->nc_nlen; 1990 1991 tncp = ap->a_tnch->ncp; /* entry name in target */ 1992 tname = tncp->nc_name; 1993 tname_len = tncp->nc_nlen; 1994 1995 /* 1996 * ip is the inode being removed. If this is a hardlink then 1997 * ip represents the actual file and not the hardlink marker. 1998 */ 1999 ip = VTOI(fncp->nc_vp); 2000 chain = NULL; 2001 2002 /* 2003 * Keep a tight grip on the inode so the temporary unlinking from 2004 * the source location prior to linking to the target location 2005 * does not cause the chain to be destroyed. 2006 * 2007 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 2008 * unlinking elements from their directories. Locking 2009 * the nlinks field does not lock the whole inode. 2010 */ 2011 hammer2_inode_ref(ip); 2012 2013 /* 2014 * Remove target if it exists 2015 */ 2016 error = hammer2_unlink_file(tdip, tname, tname_len, -1, NULL); 2017 if (error && error != ENOENT) 2018 goto done; 2019 cache_setunresolved(ap->a_tnch); 2020 2021 /* 2022 * When renaming a hardlinked file we may have to re-consolidate 2023 * the location of the hardlink target. Since the element is simply 2024 * being moved, nlinks is not modified in this case. 2025 * 2026 * If ip represents a regular file the consolidation code essentially 2027 * does nothing other than return the locked chain. 2028 * 2029 * The returned chain will be locked. 2030 */ 2031 error = hammer2_hardlink_consolidate(ip, &chain, tdip, 0); 2032 if (error) 2033 goto done; 2034 2035 /* 2036 * Disconnect (fdip, fname) from the source directory. This will 2037 * disconnect (ip) if it represents a direct file. If (ip) represents 2038 * a hardlink the HARDLINK pointer object will be removed but the 2039 * hardlink will stay intact. 2040 * 2041 * NOTE! Because we are retaining (ip) the unlink can fail with 2042 * an EAGAIN. 2043 */ 2044 for (;;) { 2045 error = hammer2_unlink_file(fdip, fname, fname_len, -1, chain); 2046 if (error != EAGAIN) 2047 break; 2048 kprintf("hammer2_vop_nrename: unlink race %s\n", fname); 2049 tsleep(fdip, 0, "h2renr", 1); 2050 } 2051 if (error) 2052 goto done; 2053 2054 /* 2055 * Reconnect ip to target directory. 2056 * 2057 * WARNING: chain locks can lock buffer cache buffers, to avoid 2058 * deadlocks we want to unlock before issuing a cache_*() 2059 * op (that might have to lock a vnode). The *_connect() 2060 * function does this for us. 2061 */ 2062 error = hammer2_inode_connect(tdip, &chain, tname, tname_len); 2063 if (error == 0) 2064 cache_rename(ap->a_fnch, ap->a_tnch); 2065 done: 2066 if (chain) 2067 hammer2_chain_unlock(hmp, chain); 2068 hammer2_inode_drop(ip); 2069 2070 return (error); 2071 } 2072 2073 static int hammer2_strategy_read(struct vop_strategy_args *ap); 2074 static int hammer2_strategy_write(struct vop_strategy_args *ap); 2075 2076 static 2077 int 2078 hammer2_vop_strategy(struct vop_strategy_args *ap) 2079 { 2080 struct bio *biop; 2081 struct buf *bp; 2082 int error; 2083 2084 biop = ap->a_bio; 2085 bp = biop->bio_buf; 2086 2087 switch(bp->b_cmd) { 2088 case BUF_CMD_READ: 2089 error = hammer2_strategy_read(ap); 2090 ++hammer2_iod_file_read; 2091 break; 2092 case BUF_CMD_WRITE: 2093 error = hammer2_strategy_write(ap); 2094 ++hammer2_iod_file_write; 2095 break; 2096 default: 2097 bp->b_error = error = EINVAL; 2098 bp->b_flags |= B_ERROR; 2099 biodone(biop); 2100 break; 2101 } 2102 2103 return (error); 2104 } 2105 2106 static 2107 int 2108 hammer2_strategy_read(struct vop_strategy_args *ap) 2109 { 2110 struct buf *bp; 2111 struct bio *bio; 2112 struct bio *nbio; 2113 hammer2_mount_t *hmp; 2114 hammer2_inode_t *ip; 2115 hammer2_chain_t *parent; 2116 hammer2_chain_t *chain; 2117 hammer2_key_t lbase; 2118 2119 bio = ap->a_bio; 2120 bp = bio->bio_buf; 2121 ip = VTOI(ap->a_vp); 2122 hmp = ip->hmp; 2123 nbio = push_bio(bio); 2124 2125 lbase = bio->bio_offset; 2126 chain = NULL; 2127 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 2128 2129 /* 2130 * We must characterize the logical->physical translation if it 2131 * has not already been cached. 2132 * 2133 * Physical data references < LBUFSIZE are never cached. This 2134 * includes both small-block allocations and inode-embedded data. 2135 */ 2136 if (nbio->bio_offset == NOOFFSET) { 2137 parent = hammer2_inode_lock_sh(ip); 2138 2139 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase, 2140 HAMMER2_LOOKUP_NODATA | 2141 HAMMER2_LOOKUP_SHARED); 2142 if (chain == NULL) { 2143 /* 2144 * Data is zero-fill 2145 */ 2146 nbio->bio_offset = ZFOFFSET; 2147 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2148 /* 2149 * Data is embedded in the inode (do nothing) 2150 */ 2151 KKASSERT(chain == parent); 2152 hammer2_chain_unlock(hmp, chain); 2153 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2154 /* 2155 * Data is on-media 2156 */ 2157 KKASSERT(bp->b_bcount == chain->bytes); 2158 nbio->bio_offset = chain->bref.data_off & 2159 HAMMER2_OFF_MASK; 2160 hammer2_chain_unlock(hmp, chain); 2161 KKASSERT(nbio->bio_offset != 0); 2162 } else { 2163 panic("hammer2_strategy_read: unknown bref type"); 2164 } 2165 hammer2_inode_unlock_sh(ip, parent); 2166 } 2167 2168 if (hammer2_debug & 0x0020) { 2169 kprintf("read %016jx %016jx\n", 2170 bio->bio_offset, nbio->bio_offset); 2171 } 2172 2173 if (nbio->bio_offset == ZFOFFSET) { 2174 /* 2175 * Data is zero-fill 2176 */ 2177 bp->b_resid = 0; 2178 bp->b_error = 0; 2179 bzero(bp->b_data, bp->b_bcount); 2180 biodone(nbio); 2181 } else if (nbio->bio_offset != NOOFFSET) { 2182 /* 2183 * Forward direct IO to the device 2184 */ 2185 vn_strategy(hmp->devvp, nbio); 2186 } else { 2187 /* 2188 * Data is embedded in inode. 2189 */ 2190 bcopy(chain->data->ipdata.u.data, bp->b_data, 2191 HAMMER2_EMBEDDED_BYTES); 2192 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2193 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2194 bp->b_resid = 0; 2195 bp->b_error = 0; 2196 biodone(nbio); 2197 } 2198 return (0); 2199 } 2200 2201 static 2202 int 2203 hammer2_strategy_write(struct vop_strategy_args *ap) 2204 { 2205 struct buf *bp; 2206 struct bio *bio; 2207 struct bio *nbio; 2208 hammer2_mount_t *hmp; 2209 hammer2_inode_t *ip; 2210 2211 bio = ap->a_bio; 2212 bp = bio->bio_buf; 2213 ip = VTOI(ap->a_vp); 2214 hmp = ip->hmp; 2215 nbio = push_bio(bio); 2216 2217 KKASSERT((bio->bio_offset & HAMMER2_PBUFMASK64) == 0); 2218 KKASSERT(nbio->bio_offset != 0 && nbio->bio_offset != ZFOFFSET); 2219 2220 if (nbio->bio_offset == NOOFFSET) { 2221 /* 2222 * Must be embedded in the inode. 2223 * 2224 * Because the inode is dirty, the chain must exist whether 2225 * the inode is locked or not. XXX 2226 */ 2227 KKASSERT(bio->bio_offset == 0); 2228 KKASSERT(ip->chain && ip->chain->data); 2229 bcopy(bp->b_data, ip->chain->data->ipdata.u.data, 2230 HAMMER2_EMBEDDED_BYTES); 2231 bp->b_resid = 0; 2232 bp->b_error = 0; 2233 biodone(nbio); 2234 2235 /* 2236 * This special flag does not follow the normal MODIFY rules 2237 * because we might deadlock on ip. Instead we depend on 2238 * VOP_FSYNC() to detect the case. 2239 */ 2240 atomic_set_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED); 2241 } else { 2242 /* 2243 * Forward direct IO to the device 2244 */ 2245 vn_strategy(hmp->devvp, nbio); 2246 } 2247 return (0); 2248 } 2249 2250 /* 2251 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2252 */ 2253 static 2254 int 2255 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2256 { 2257 hammer2_mount_t *hmp; 2258 hammer2_inode_t *ip; 2259 int error; 2260 2261 ip = VTOI(ap->a_vp); 2262 hmp = ip->hmp; 2263 2264 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2265 ap->a_fflag, ap->a_cred); 2266 return (error); 2267 } 2268 2269 static 2270 int 2271 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2272 { 2273 struct mount *mp; 2274 hammer2_pfsmount_t *pmp; 2275 int rc; 2276 2277 switch (ap->a_op) { 2278 case (MOUNTCTL_SET_EXPORT): 2279 mp = ap->a_head.a_ops->head.vv_mount; 2280 pmp = MPTOPMP(mp); 2281 2282 if (ap->a_ctllen != sizeof(struct export_args)) 2283 rc = (EINVAL); 2284 else 2285 rc = vfs_export(mp, &pmp->export, 2286 (const struct export_args *)ap->a_ctl); 2287 break; 2288 default: 2289 rc = vop_stdmountctl(ap); 2290 break; 2291 } 2292 return (rc); 2293 } 2294 2295 struct vop_ops hammer2_vnode_vops = { 2296 .vop_default = vop_defaultop, 2297 .vop_fsync = hammer2_vop_fsync, 2298 .vop_getpages = vop_stdgetpages, 2299 .vop_putpages = vop_stdputpages, 2300 .vop_access = hammer2_vop_access, 2301 .vop_advlock = hammer2_vop_advlock, 2302 .vop_close = hammer2_vop_close, 2303 .vop_nlink = hammer2_vop_nlink, 2304 .vop_ncreate = hammer2_vop_ncreate, 2305 .vop_nsymlink = hammer2_vop_nsymlink, 2306 .vop_nremove = hammer2_vop_nremove, 2307 .vop_nrmdir = hammer2_vop_nrmdir, 2308 .vop_nrename = hammer2_vop_nrename, 2309 .vop_getattr = hammer2_vop_getattr, 2310 .vop_setattr = hammer2_vop_setattr, 2311 .vop_readdir = hammer2_vop_readdir, 2312 .vop_readlink = hammer2_vop_readlink, 2313 .vop_getpages = vop_stdgetpages, 2314 .vop_putpages = vop_stdputpages, 2315 .vop_read = hammer2_vop_read, 2316 .vop_write = hammer2_vop_write, 2317 .vop_open = hammer2_vop_open, 2318 .vop_inactive = hammer2_vop_inactive, 2319 .vop_reclaim = hammer2_vop_reclaim, 2320 .vop_nresolve = hammer2_vop_nresolve, 2321 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2322 .vop_nmkdir = hammer2_vop_nmkdir, 2323 .vop_ioctl = hammer2_vop_ioctl, 2324 .vop_mountctl = hammer2_vop_mountctl, 2325 .vop_bmap = hammer2_vop_bmap, 2326 .vop_strategy = hammer2_vop_strategy, 2327 }; 2328 2329 struct vop_ops hammer2_spec_vops = { 2330 2331 }; 2332 2333 struct vop_ops hammer2_fifo_vops = { 2334 2335 }; 2336