1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/fcntl.h> 39 #include <sys/buf.h> 40 #include <sys/proc.h> 41 #include <sys/namei.h> 42 #include <sys/mount.h> 43 #include <sys/vnode.h> 44 #include <sys/mountctl.h> 45 #include <sys/dirent.h> 46 #include <sys/uio.h> 47 48 #include "hammer2.h" 49 50 #define ZFOFFSET (-2LL) 51 52 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, 53 int seqcount); 54 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 55 int ioflag, int seqcount); 56 static hammer2_off_t hammer2_assign_physical(hammer2_trans_t *trans, 57 hammer2_inode_t *ip, 58 hammer2_key_t lbase, int lblksize, 59 int *errorp); 60 static void hammer2_extend_file(hammer2_trans_t *trans, 61 hammer2_inode_t *ip, hammer2_key_t nsize); 62 static void hammer2_truncate_file(hammer2_trans_t *trans, 63 hammer2_inode_t *ip, hammer2_key_t nsize); 64 65 static __inline 66 void 67 hammer2_knote(struct vnode *vp, int flags) 68 { 69 if (flags) 70 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 71 } 72 73 /* 74 * Last reference to a vnode is going away but it is still cached. 75 */ 76 static 77 int 78 hammer2_vop_inactive(struct vop_inactive_args *ap) 79 { 80 hammer2_inode_t *ip; 81 hammer2_trans_t trans; 82 struct vnode *vp; 83 #if 0 84 struct hammer2_mount *hmp; 85 #endif 86 87 vp = ap->a_vp; 88 ip = VTOI(vp); 89 90 /* 91 * Degenerate case 92 */ 93 if (ip == NULL) { 94 vrecycle(vp); 95 return (0); 96 } 97 98 /* 99 * Detect updates to the embedded data which may be synchronized by 100 * the strategy code. Simply mark the inode modified so it gets 101 * picked up by our normal flush. 102 */ 103 hammer2_inode_lock_ex(ip); 104 KKASSERT(ip->chain); 105 if (ip->flags & HAMMER2_INODE_DIRTYEMBED) { 106 atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED); 107 hammer2_trans_init(&trans, ip->hmp); 108 hammer2_chain_modify(&trans, ip->chain, 0); 109 hammer2_trans_done(&trans); 110 } 111 112 /* 113 * Check for deleted inodes and recycle immediately. 114 */ 115 if (ip->chain->flags & HAMMER2_CHAIN_DELETED) { 116 hammer2_inode_unlock_ex(ip); 117 vrecycle(vp); 118 } else { 119 hammer2_inode_unlock_ex(ip); 120 } 121 return (0); 122 } 123 124 /* 125 * Reclaim a vnode so that it can be reused; after the inode is 126 * disassociated, the filesystem must manage it alone. 127 */ 128 static 129 int 130 hammer2_vop_reclaim(struct vop_reclaim_args *ap) 131 { 132 hammer2_chain_t *chain; 133 hammer2_inode_t *ip; 134 hammer2_mount_t *hmp; 135 hammer2_trans_t trans; 136 struct vnode *vp; 137 138 vp = ap->a_vp; 139 ip = VTOI(vp); 140 if (ip == NULL) 141 return(0); 142 hmp = ip->hmp; 143 144 /* 145 * Set SUBMODIFIED so we can detect and propagate the DESTROYED 146 * bit in the flush code. 147 */ 148 hammer2_inode_lock_ex(ip); 149 chain = ip->chain; 150 vp->v_data = NULL; 151 ip->vp = NULL; 152 if (chain->flags & HAMMER2_CHAIN_DELETED) { 153 KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED); 154 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROYED | 155 HAMMER2_CHAIN_SUBMODIFIED); 156 } 157 if (chain->flags & (HAMMER2_CHAIN_MODIFIED | 158 HAMMER2_CHAIN_DELETED | 159 HAMMER2_CHAIN_SUBMODIFIED)) { 160 hammer2_trans_init(&trans, ip->hmp); 161 hammer2_chain_flush(&trans, chain); 162 hammer2_trans_done(&trans); 163 } 164 if (ip->refs > 2) /* (our lock + vp ref) */ 165 hammer2_inode_unlock_ex(ip); /* unlock */ 166 else 167 hammer2_inode_put(ip); /* unlock & disconnect */ 168 /* chain no longer referenced */ 169 /* chain = NULL; not needed */ 170 hammer2_inode_drop(ip); /* vp ref */ 171 172 /* 173 * XXX handle background sync when ip dirty, kernel will no longer 174 * notify us regarding this inode because there is no longer a 175 * vnode attached to it. 176 */ 177 178 return (0); 179 } 180 181 static 182 int 183 hammer2_vop_fsync(struct vop_fsync_args *ap) 184 { 185 hammer2_inode_t *ip; 186 hammer2_trans_t trans; 187 struct vnode *vp; 188 189 vp = ap->a_vp; 190 ip = VTOI(vp); 191 192 hammer2_trans_init(&trans, ip->hmp); 193 hammer2_inode_lock_ex(ip); 194 195 vfsync(vp, ap->a_waitfor, 1, NULL, NULL); 196 197 /* 198 * Detect updates to the embedded data which may be synchronized by 199 * the strategy code. Simply mark the inode modified so it gets 200 * picked up by our normal flush. 201 */ 202 if (ip->flags & HAMMER2_INODE_DIRTYEMBED) { 203 atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED); 204 hammer2_chain_modify(&trans, ip->chain, 0); 205 } 206 207 /* 208 * Calling chain_flush here creates a lot of duplicative 209 * COW operations due to non-optimal vnode ordering. 210 * 211 * Only do it for an actual fsync() syscall. The other forms 212 * which call this function will eventually call chain_flush 213 * on the volume root as a catch-all, which is far more optimal. 214 */ 215 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); 216 if (ap->a_flags & VOP_FSYNC_SYSCALL) 217 hammer2_chain_flush(&trans, ip->chain); 218 hammer2_inode_unlock_ex(ip); 219 hammer2_trans_done(&trans); 220 return (0); 221 } 222 223 static 224 int 225 hammer2_vop_access(struct vop_access_args *ap) 226 { 227 hammer2_inode_t *ip = VTOI(ap->a_vp); 228 hammer2_inode_data_t *ipdata; 229 uid_t uid; 230 gid_t gid; 231 int error; 232 233 hammer2_inode_lock_sh(ip); 234 ipdata = &ip->chain->data->ipdata; 235 uid = hammer2_to_unix_xid(&ipdata->uid); 236 gid = hammer2_to_unix_xid(&ipdata->gid); 237 error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags); 238 hammer2_inode_unlock_sh(ip); 239 240 return (error); 241 } 242 243 static 244 int 245 hammer2_vop_getattr(struct vop_getattr_args *ap) 246 { 247 hammer2_inode_data_t *ipdata; 248 hammer2_pfsmount_t *pmp; 249 hammer2_inode_t *ip; 250 struct vnode *vp; 251 struct vattr *vap; 252 253 vp = ap->a_vp; 254 vap = ap->a_vap; 255 256 ip = VTOI(vp); 257 pmp = ip->pmp; 258 259 hammer2_inode_lock_sh(ip); 260 ipdata = &ip->chain->data->ipdata; 261 262 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; 263 vap->va_fileid = ipdata->inum; 264 vap->va_mode = ipdata->mode; 265 vap->va_nlink = ipdata->nlinks; 266 vap->va_uid = hammer2_to_unix_xid(&ipdata->uid); 267 vap->va_gid = hammer2_to_unix_xid(&ipdata->gid); 268 vap->va_rmajor = 0; 269 vap->va_rminor = 0; 270 vap->va_size = ipdata->size; 271 vap->va_blocksize = HAMMER2_PBUFSIZE; 272 vap->va_flags = ipdata->uflags; 273 hammer2_time_to_timespec(ipdata->ctime, &vap->va_ctime); 274 hammer2_time_to_timespec(ipdata->mtime, &vap->va_mtime); 275 hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime); 276 vap->va_gen = 1; 277 vap->va_bytes = vap->va_size; /* XXX */ 278 vap->va_type = hammer2_get_vtype(ip->chain); 279 vap->va_filerev = 0; 280 vap->va_uid_uuid = ipdata->uid; 281 vap->va_gid_uuid = ipdata->gid; 282 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 283 VA_FSID_UUID_VALID; 284 285 hammer2_inode_unlock_sh(ip); 286 287 return (0); 288 } 289 290 static 291 int 292 hammer2_vop_setattr(struct vop_setattr_args *ap) 293 { 294 hammer2_inode_data_t *ipdata; 295 hammer2_inode_t *ip; 296 hammer2_mount_t *hmp; 297 hammer2_trans_t trans; 298 struct vnode *vp; 299 struct vattr *vap; 300 int error; 301 int kflags = 0; 302 int domtime = 0; 303 uint64_t ctime; 304 305 vp = ap->a_vp; 306 vap = ap->a_vap; 307 hammer2_update_time(&ctime); 308 309 ip = VTOI(vp); 310 hmp = ip->hmp; 311 312 if (hmp->ronly) 313 return(EROFS); 314 315 hammer2_trans_init(&trans, hmp); 316 hammer2_inode_lock_ex(ip); 317 ipdata = &ip->chain->data->ipdata; 318 error = 0; 319 320 if (vap->va_flags != VNOVAL) { 321 u_int32_t flags; 322 323 flags = ipdata->uflags; 324 error = vop_helper_setattr_flags(&flags, vap->va_flags, 325 hammer2_to_unix_xid(&ipdata->uid), 326 ap->a_cred); 327 if (error == 0) { 328 if (ipdata->uflags != flags) { 329 hammer2_chain_modify(&trans, ip->chain, 0); 330 ipdata->uflags = flags; 331 ipdata->ctime = ctime; 332 kflags |= NOTE_ATTRIB; 333 } 334 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 335 error = 0; 336 goto done; 337 } 338 } 339 goto done; 340 } 341 if (ipdata->uflags & (IMMUTABLE | APPEND)) { 342 error = EPERM; 343 goto done; 344 } 345 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 346 mode_t cur_mode = ipdata->mode; 347 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 348 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 349 uuid_t uuid_uid; 350 uuid_t uuid_gid; 351 352 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 353 ap->a_cred, 354 &cur_uid, &cur_gid, &cur_mode); 355 if (error == 0) { 356 hammer2_guid_to_uuid(&uuid_uid, cur_uid); 357 hammer2_guid_to_uuid(&uuid_gid, cur_gid); 358 if (bcmp(&uuid_uid, &ipdata->uid, sizeof(uuid_uid)) || 359 bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) || 360 ipdata->mode != cur_mode 361 ) { 362 hammer2_chain_modify(&trans, ip->chain, 0); 363 ipdata->uid = uuid_uid; 364 ipdata->gid = uuid_gid; 365 ipdata->mode = cur_mode; 366 ipdata->ctime = ctime; 367 } 368 kflags |= NOTE_ATTRIB; 369 } 370 } 371 372 /* 373 * Resize the file 374 */ 375 if (vap->va_size != VNOVAL && ipdata->size != vap->va_size) { 376 switch(vp->v_type) { 377 case VREG: 378 if (vap->va_size == ipdata->size) 379 break; 380 if (vap->va_size < ipdata->size) { 381 hammer2_truncate_file(&trans, ip, vap->va_size); 382 } else { 383 hammer2_extend_file(&trans, ip, vap->va_size); 384 } 385 domtime = 1; 386 break; 387 default: 388 error = EINVAL; 389 goto done; 390 } 391 } 392 #if 0 393 /* atime not supported */ 394 if (vap->va_atime.tv_sec != VNOVAL) { 395 hammer2_chain_modify(&trans, ip->chain, 0); 396 ipdata->atime = hammer2_timespec_to_time(&vap->va_atime); 397 kflags |= NOTE_ATTRIB; 398 } 399 #endif 400 if (vap->va_mtime.tv_sec != VNOVAL) { 401 hammer2_chain_modify(&trans, ip->chain, 0); 402 ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); 403 kflags |= NOTE_ATTRIB; 404 } 405 if (vap->va_mode != (mode_t)VNOVAL) { 406 mode_t cur_mode = ipdata->mode; 407 uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid); 408 gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid); 409 410 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 411 cur_uid, cur_gid, &cur_mode); 412 if (error == 0 && ipdata->mode != cur_mode) { 413 hammer2_chain_modify(&trans, ip->chain, 0); 414 ipdata->mode = cur_mode; 415 ipdata->ctime = ctime; 416 kflags |= NOTE_ATTRIB; 417 } 418 } 419 done: 420 hammer2_inode_unlock_ex(ip); 421 hammer2_trans_done(&trans); 422 return (error); 423 } 424 425 static 426 int 427 hammer2_vop_readdir(struct vop_readdir_args *ap) 428 { 429 hammer2_inode_data_t *ipdata; 430 hammer2_mount_t *hmp; 431 hammer2_inode_t *ip; 432 hammer2_inode_t *xip; 433 hammer2_chain_t *parent; 434 hammer2_chain_t *chain; 435 hammer2_tid_t inum; 436 hammer2_key_t lkey; 437 struct uio *uio; 438 off_t *cookies; 439 off_t saveoff; 440 int cookie_index; 441 int ncookies; 442 int error; 443 int dtype; 444 int r; 445 446 ip = VTOI(ap->a_vp); 447 hmp = ip->hmp; 448 uio = ap->a_uio; 449 saveoff = uio->uio_offset; 450 451 /* 452 * Setup cookies directory entry cookies if requested 453 */ 454 if (ap->a_ncookies) { 455 ncookies = uio->uio_resid / 16 + 1; 456 if (ncookies > 1024) 457 ncookies = 1024; 458 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 459 } else { 460 ncookies = -1; 461 cookies = NULL; 462 } 463 cookie_index = 0; 464 465 hammer2_inode_lock_sh(ip); 466 ipdata = &ip->chain->data->ipdata; 467 468 /* 469 * Handle artificial entries. To ensure that only positive 64 bit 470 * quantities are returned to userland we always strip off bit 63. 471 * The hash code is designed such that codes 0x0000-0x7FFF are not 472 * used, allowing us to use these codes for articial entries. 473 * 474 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not 475 * allow '..' to cross the mount point into (e.g.) the super-root. 476 */ 477 error = 0; 478 chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ 479 480 if (saveoff == 0) { 481 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 482 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, "."); 483 if (r) 484 goto done; 485 if (cookies) 486 cookies[cookie_index] = saveoff; 487 ++saveoff; 488 ++cookie_index; 489 if (cookie_index == ncookies) 490 goto done; 491 } 492 493 if (saveoff == 1) { 494 /* 495 * Be careful with lockorder when accessing ".." 496 * 497 * (ip is the current dir. xip is the parent dir). 498 */ 499 inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; 500 while (ip->pip != NULL && ip != ip->pmp->iroot) { 501 xip = ip->pip; 502 hammer2_inode_ref(xip); 503 hammer2_inode_unlock_sh(ip); 504 hammer2_inode_lock_sh(xip); 505 hammer2_inode_lock_sh(ip); 506 hammer2_inode_drop(xip); 507 if (xip == ip->pip) { 508 inum = xip->chain->data->ipdata.inum & 509 HAMMER2_DIRHASH_USERMSK; 510 hammer2_inode_unlock_sh(xip); 511 break; 512 } 513 hammer2_inode_unlock_sh(xip); 514 } 515 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); 516 if (r) 517 goto done; 518 if (cookies) 519 cookies[cookie_index] = saveoff; 520 ++saveoff; 521 ++cookie_index; 522 if (cookie_index == ncookies) 523 goto done; 524 } 525 526 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE; 527 528 /* 529 * parent is the inode chain, already locked for us. Don't 530 * double lock shared locks as this will screw up upgrades. 531 */ 532 if (error) { 533 goto done; 534 } 535 parent = hammer2_chain_lookup_init(ip->chain, HAMMER2_LOOKUP_SHARED); 536 chain = hammer2_chain_lookup(&parent, lkey, lkey, 537 HAMMER2_LOOKUP_SHARED); 538 if (chain == NULL) { 539 chain = hammer2_chain_lookup(&parent, 540 lkey, (hammer2_key_t)-1, 541 HAMMER2_LOOKUP_SHARED); 542 } 543 while (chain) { 544 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 545 dtype = hammer2_get_dtype(chain); 546 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK; 547 r = vop_write_dirent(&error, uio, 548 chain->data->ipdata.inum & 549 HAMMER2_DIRHASH_USERMSK, 550 dtype, 551 chain->data->ipdata.name_len, 552 chain->data->ipdata.filename); 553 if (r) 554 break; 555 if (cookies) 556 cookies[cookie_index] = saveoff; 557 ++cookie_index; 558 } else { 559 /* XXX chain error */ 560 kprintf("bad chain type readdir %d\n", 561 chain->bref.type); 562 } 563 564 /* 565 * Keys may not be returned in order so once we have a 566 * placemarker (chain) the scan must allow the full range 567 * or some entries will be missed. 568 */ 569 chain = hammer2_chain_next(&parent, chain, 570 HAMMER2_DIRHASH_VISIBLE, 571 (hammer2_key_t)-1, 572 HAMMER2_LOOKUP_SHARED); 573 if (chain) { 574 saveoff = (chain->bref.key & 575 HAMMER2_DIRHASH_USERMSK) + 1; 576 } else { 577 saveoff = (hammer2_key_t)-1; 578 } 579 if (cookie_index == ncookies) 580 break; 581 } 582 if (chain) 583 hammer2_chain_unlock(chain); 584 hammer2_chain_lookup_done(parent); 585 done: 586 hammer2_inode_unlock_sh(ip); 587 if (ap->a_eofflag) 588 *ap->a_eofflag = (chain == NULL); 589 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; 590 if (error && cookie_index == 0) { 591 if (cookies) { 592 kfree(cookies, M_TEMP); 593 *ap->a_ncookies = 0; 594 *ap->a_cookies = NULL; 595 } 596 } else { 597 if (cookies) { 598 *ap->a_ncookies = cookie_index; 599 *ap->a_cookies = cookies; 600 } 601 } 602 return (error); 603 } 604 605 /* 606 * hammer2_vop_readlink { vp, uio, cred } 607 */ 608 static 609 int 610 hammer2_vop_readlink(struct vop_readlink_args *ap) 611 { 612 struct vnode *vp; 613 hammer2_mount_t *hmp; 614 hammer2_inode_t *ip; 615 int error; 616 617 vp = ap->a_vp; 618 if (vp->v_type != VLNK) 619 return (EINVAL); 620 ip = VTOI(vp); 621 hmp = ip->hmp; 622 623 error = hammer2_read_file(ip, ap->a_uio, 0); 624 return (error); 625 } 626 627 static 628 int 629 hammer2_vop_read(struct vop_read_args *ap) 630 { 631 struct vnode *vp; 632 hammer2_mount_t *hmp; 633 hammer2_inode_t *ip; 634 struct uio *uio; 635 int error; 636 int seqcount; 637 int bigread; 638 639 /* 640 * Read operations supported on this vnode? 641 */ 642 vp = ap->a_vp; 643 if (vp->v_type != VREG) 644 return (EINVAL); 645 646 /* 647 * Misc 648 */ 649 ip = VTOI(vp); 650 hmp = ip->hmp; 651 uio = ap->a_uio; 652 error = 0; 653 654 seqcount = ap->a_ioflag >> 16; 655 bigread = (uio->uio_resid > 100 * 1024 * 1024); 656 657 error = hammer2_read_file(ip, uio, seqcount); 658 return (error); 659 } 660 661 static 662 int 663 hammer2_vop_write(struct vop_write_args *ap) 664 { 665 hammer2_mount_t *hmp; 666 hammer2_inode_t *ip; 667 thread_t td; 668 struct vnode *vp; 669 struct uio *uio; 670 int error; 671 int seqcount; 672 int bigwrite; 673 674 /* 675 * Read operations supported on this vnode? 676 */ 677 vp = ap->a_vp; 678 if (vp->v_type != VREG) 679 return (EINVAL); 680 681 /* 682 * Misc 683 */ 684 ip = VTOI(vp); 685 hmp = ip->hmp; 686 uio = ap->a_uio; 687 error = 0; 688 if (hmp->ronly) 689 return (EROFS); 690 691 seqcount = ap->a_ioflag >> 16; 692 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 693 694 /* 695 * Check resource limit 696 */ 697 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc && 698 uio->uio_offset + uio->uio_resid > 699 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 700 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ); 701 return (EFBIG); 702 } 703 704 bigwrite = (uio->uio_resid > 100 * 1024 * 1024); 705 706 /* 707 * ip must be locked if extending the file. 708 * ip must be locked to avoid racing a truncation. 709 * 710 * ip must be marked modified, particularly because the write 711 * might wind up being copied into the embedded data area. 712 */ 713 hammer2_inode_lock_ex(ip); 714 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount); 715 hammer2_inode_unlock_ex(ip); 716 return (error); 717 } 718 719 /* 720 * Perform read operations on a file or symlink given an UNLOCKED 721 * inode and uio. 722 * 723 * The passed ip is not locked. 724 */ 725 static 726 int 727 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount) 728 { 729 hammer2_off_t size; 730 struct buf *bp; 731 int error; 732 733 error = 0; 734 735 /* 736 * UIO read loop. 737 */ 738 hammer2_inode_lock_sh(ip); 739 size = ip->chain->data->ipdata.size; 740 741 while (uio->uio_resid > 0 && uio->uio_offset < size) { 742 hammer2_key_t lbase; 743 hammer2_key_t leof; 744 int lblksize; 745 int loff; 746 int n; 747 748 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 749 &lbase, &leof); 750 751 error = cluster_read(ip->vp, leof, lbase, lblksize, 752 uio->uio_resid, seqcount * BKVASIZE, 753 &bp); 754 755 if (error) 756 break; 757 loff = (int)(uio->uio_offset - lbase); 758 n = lblksize - loff; 759 if (n > uio->uio_resid) 760 n = uio->uio_resid; 761 if (n > size - uio->uio_offset) 762 n = (int)(size - uio->uio_offset); 763 bp->b_flags |= B_AGE; 764 uiomove((char *)bp->b_data + loff, n, uio); 765 bqrelse(bp); 766 } 767 hammer2_inode_unlock_sh(ip); 768 return (error); 769 } 770 771 /* 772 * Called with a locked (ip) to do the underlying write to a file or 773 * to build the symlink target. 774 */ 775 static 776 int 777 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, 778 int ioflag, int seqcount) 779 { 780 hammer2_trans_t trans; 781 hammer2_inode_data_t *ipdata; 782 hammer2_key_t old_eof; 783 struct buf *bp; 784 int kflags; 785 int error; 786 int modified = 0; 787 788 /* 789 * Setup if append 790 */ 791 ipdata = &ip->chain->data->ipdata; 792 if (ioflag & IO_APPEND) 793 uio->uio_offset = ipdata->size; 794 kflags = 0; 795 error = 0; 796 797 hammer2_trans_init(&trans, ip->hmp); 798 799 /* 800 * Extend the file if necessary. If the write fails at some point 801 * we will truncate it back down to cover as much as we were able 802 * to write. 803 * 804 * Doing this now makes it easier to calculate buffer sizes in 805 * the loop. 806 */ 807 old_eof = ipdata->size; 808 if (uio->uio_offset + uio->uio_resid > ipdata->size) { 809 modified = 1; 810 hammer2_extend_file(&trans, ip, 811 uio->uio_offset + uio->uio_resid); 812 kflags |= NOTE_EXTEND; 813 } 814 815 /* 816 * UIO write loop 817 */ 818 while (uio->uio_resid > 0) { 819 hammer2_key_t lbase; 820 hammer2_key_t leof; 821 int trivial; 822 int lblksize; 823 int loff; 824 int n; 825 826 /* 827 * Don't allow the buffer build to blow out the buffer 828 * cache. 829 */ 830 if ((ioflag & IO_RECURSE) == 0) { 831 /* 832 * XXX should try to leave this unlocked through 833 * the whole loop 834 */ 835 hammer2_inode_unlock_ex(ip); 836 bwillwrite(HAMMER2_PBUFSIZE); 837 hammer2_inode_lock_ex(ip); 838 ipdata = &ip->chain->data->ipdata; /* reload */ 839 } 840 841 /* XXX bigwrite & signal check test */ 842 843 /* 844 * This nominally tells us how much we can cluster and 845 * what the logical buffer size needs to be. Currently 846 * we don't try to cluster the write and just handle one 847 * block at a time. 848 */ 849 lblksize = hammer2_calc_logical(ip, uio->uio_offset, 850 &lbase, &leof); 851 loff = (int)(uio->uio_offset - lbase); 852 853 /* 854 * Calculate bytes to copy this transfer and whether the 855 * copy completely covers the buffer or not. 856 */ 857 trivial = 0; 858 n = lblksize - loff; 859 if (n > uio->uio_resid) { 860 n = uio->uio_resid; 861 if (uio->uio_offset + n == ipdata->size) 862 trivial = 1; 863 } else if (loff == 0) { 864 trivial = 1; 865 } 866 867 /* 868 * Get the buffer 869 */ 870 if (uio->uio_segflg == UIO_NOCOPY) { 871 /* 872 * Issuing a write with the same data backing the 873 * buffer. Instantiate the buffer to collect the 874 * backing vm pages, then read-in any missing bits. 875 * 876 * This case is used by vop_stdputpages(). 877 */ 878 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 879 if ((bp->b_flags & B_CACHE) == 0) { 880 bqrelse(bp); 881 error = bread(ip->vp, lbase, lblksize, &bp); 882 } 883 } else if (trivial) { 884 /* 885 * Even though we are entirely overwriting the buffer 886 * we may still have to zero it out to avoid a 887 * mmap/write visibility issue. 888 */ 889 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0); 890 if ((bp->b_flags & B_CACHE) == 0) 891 vfs_bio_clrbuf(bp); 892 } else { 893 /* 894 * Partial overwrite, read in any missing bits then 895 * replace the portion being written. 896 * 897 * (The strategy code will detect zero-fill physical 898 * blocks for this case). 899 */ 900 error = bread(ip->vp, lbase, lblksize, &bp); 901 if (error == 0) 902 bheavy(bp); 903 } 904 905 if (error) { 906 brelse(bp); 907 break; 908 } 909 910 /* 911 * We have to assign physical storage to the buffer we intend 912 * to dirty or write now to avoid deadlocks in the strategy 913 * code later. 914 * 915 * This can return NOOFFSET for inode-embedded data. The 916 * strategy code will take care of it in that case. 917 */ 918 bp->b_bio2.bio_offset = 919 hammer2_assign_physical(&trans, ip, 920 lbase, lblksize, &error); 921 if (error) { 922 brelse(bp); 923 break; 924 } 925 926 /* 927 * Ok, copy the data in 928 */ 929 hammer2_inode_unlock_ex(ip); 930 error = uiomove(bp->b_data + loff, n, uio); 931 hammer2_inode_lock_ex(ip); 932 ipdata = &ip->chain->data->ipdata; /* reload */ 933 kflags |= NOTE_WRITE; 934 modified = 1; 935 936 if (error) { 937 brelse(bp); 938 break; 939 } 940 941 /* XXX update ip_data.mtime */ 942 943 /* 944 * Once we dirty a buffer any cached offset becomes invalid. 945 * 946 * NOTE: For cluster_write() always use the trailing block 947 * size, which is HAMMER2_PBUFSIZE. lblksize is the 948 * eof-straddling blocksize and is incorrect. 949 */ 950 bp->b_flags |= B_AGE; 951 if (ioflag & IO_SYNC) { 952 bwrite(bp); 953 } else if ((ioflag & IO_DIRECT) && loff + n == lblksize) { 954 if (bp->b_bcount == HAMMER2_PBUFSIZE) 955 bp->b_flags |= B_CLUSTEROK; 956 bdwrite(bp); 957 } else if (ioflag & IO_ASYNC) { 958 bawrite(bp); 959 } else if (hammer2_cluster_enable) { 960 if (bp->b_bcount == HAMMER2_PBUFSIZE) 961 bp->b_flags |= B_CLUSTEROK; 962 cluster_write(bp, leof, HAMMER2_PBUFSIZE, seqcount); 963 } else { 964 if (bp->b_bcount == HAMMER2_PBUFSIZE) 965 bp->b_flags |= B_CLUSTEROK; 966 bdwrite(bp); 967 } 968 } 969 970 /* 971 * Cleanup. If we extended the file EOF but failed to write through 972 * the entire write is a failure and we have to back-up. 973 */ 974 if (error && ipdata->size != old_eof) { 975 hammer2_truncate_file(&trans, ip, old_eof); 976 } else if (modified) { 977 hammer2_chain_modify(&trans, ip->chain, 0); 978 hammer2_update_time(&ipdata->mtime); 979 } 980 hammer2_knote(ip->vp, kflags); 981 hammer2_trans_done(&trans); 982 return error; 983 } 984 985 /* 986 * Assign physical storage to a logical block. 987 * 988 * NOOFFSET is returned if the data is inode-embedded. In this case the 989 * strategy code will simply bcopy() the data into the inode. 990 * 991 * The inode's delta_dcount is adjusted. 992 */ 993 static 994 hammer2_off_t 995 hammer2_assign_physical(hammer2_trans_t *trans, hammer2_inode_t *ip, 996 hammer2_key_t lbase, int lblksize, int *errorp) 997 { 998 hammer2_mount_t *hmp; 999 hammer2_chain_t *parent; 1000 hammer2_chain_t *chain; 1001 hammer2_off_t pbase; 1002 1003 /* 1004 * Locate the chain associated with lbase, return a locked chain. 1005 * However, do not instantiate any data reference (which utilizes a 1006 * device buffer) because we will be using direct IO via the 1007 * logical buffer cache buffer. 1008 */ 1009 hmp = ip->hmp; 1010 *errorp = 0; 1011 retry: 1012 hammer2_inode_lock_ex(ip); 1013 parent = hammer2_chain_lookup_init(ip->chain, 0); 1014 chain = hammer2_chain_lookup(&parent, 1015 lbase, lbase, 1016 HAMMER2_LOOKUP_NODATA); 1017 1018 if (chain == NULL) { 1019 /* 1020 * We found a hole, create a new chain entry. 1021 * 1022 * NOTE: DATA chains are created without device backing 1023 * store (nor do we want any). 1024 */ 1025 *errorp = hammer2_chain_create(trans, parent, &chain, 1026 lbase, HAMMER2_PBUFRADIX, 1027 HAMMER2_BREF_TYPE_DATA, 1028 lblksize); 1029 if (chain == NULL) { 1030 hammer2_inode_unlock_ex(ip); 1031 hammer2_chain_lookup_done(parent); 1032 panic("hammer2_chain_create: par=%p error=%d\n", 1033 parent, *errorp); 1034 goto retry; 1035 } 1036 1037 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 1038 /*ip->delta_dcount += lblksize;*/ 1039 } else { 1040 switch (chain->bref.type) { 1041 case HAMMER2_BREF_TYPE_INODE: 1042 /* 1043 * The data is embedded in the inode. The 1044 * caller is responsible for marking the inode 1045 * modified and copying the data to the embedded 1046 * area. 1047 */ 1048 pbase = NOOFFSET; 1049 break; 1050 case HAMMER2_BREF_TYPE_DATA: 1051 if (chain->bytes != lblksize) { 1052 panic("hammer2_assign_physical: " 1053 "size mismatch %d/%d\n", 1054 lblksize, chain->bytes); 1055 } 1056 hammer2_chain_modify(trans, chain, 1057 HAMMER2_MODIFY_OPTDATA); 1058 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; 1059 break; 1060 default: 1061 panic("hammer2_assign_physical: bad type"); 1062 /* NOT REACHED */ 1063 pbase = NOOFFSET; 1064 break; 1065 } 1066 } 1067 if (chain) 1068 hammer2_chain_unlock(chain); 1069 hammer2_chain_lookup_done(parent); 1070 1071 hammer2_inode_unlock_ex(ip); 1072 1073 return (pbase); 1074 } 1075 1076 /* 1077 * Truncate the size of a file. 1078 * 1079 * This routine adjusts ipdata->size smaller, destroying any related 1080 * data beyond the new EOF and potentially resizing the block straddling 1081 * the EOF. 1082 * 1083 * The inode must be locked. 1084 */ 1085 static 1086 void 1087 hammer2_truncate_file(hammer2_trans_t *trans, 1088 hammer2_inode_t *ip, hammer2_key_t nsize) 1089 { 1090 hammer2_inode_data_t *ipdata; 1091 hammer2_chain_t *parent; 1092 hammer2_chain_t *chain; 1093 hammer2_key_t lbase; 1094 hammer2_key_t leof; 1095 struct buf *bp; 1096 int loff; 1097 int error; 1098 int oblksize; 1099 int nblksize; 1100 1101 hammer2_chain_modify(trans, ip->chain, 0); 1102 bp = NULL; 1103 ipdata = &ip->chain->data->ipdata; 1104 error = 0; 1105 1106 /* 1107 * Destroy any logical buffer cache buffers beyond the file EOF. 1108 * 1109 * We call nvtruncbuf() w/ trivial == 1 to prevent it from messing 1110 * around with the buffer straddling EOF, because we need to assign 1111 * a new physical offset to it. 1112 */ 1113 if (ip->vp) { 1114 nvtruncbuf(ip->vp, nsize, 1115 HAMMER2_PBUFSIZE, (int)nsize & HAMMER2_PBUFMASK, 1116 1); 1117 } 1118 1119 /* 1120 * Setup for lookup/search 1121 */ 1122 parent = hammer2_chain_lookup_init(ip->chain, 0); 1123 1124 /* 1125 * Handle the case where a chain/logical-buffer straddles the new 1126 * EOF. We told nvtruncbuf() above not to mess with the logical 1127 * buffer straddling the EOF because we need to reassign its storage 1128 * and can't let the strategy code do it for us. 1129 */ 1130 loff = (int)nsize & HAMMER2_PBUFMASK; 1131 if (loff && ip->vp) { 1132 oblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof); 1133 error = bread(ip->vp, lbase, oblksize, &bp); 1134 KKASSERT(error == 0); 1135 } 1136 ipdata->size = nsize; 1137 nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof); 1138 1139 /* 1140 * Fixup the chain element. If we have a logical buffer in-hand 1141 * we don't want to create a conflicting device buffer. 1142 */ 1143 if (loff && bp) { 1144 chain = hammer2_chain_lookup(&parent, lbase, lbase, 1145 HAMMER2_LOOKUP_NODATA); 1146 if (chain) { 1147 switch(chain->bref.type) { 1148 case HAMMER2_BREF_TYPE_DATA: 1149 hammer2_chain_resize(trans, ip, bp, 1150 parent, &chain, 1151 hammer2_allocsize(nblksize), 1152 HAMMER2_MODIFY_OPTDATA); 1153 allocbuf(bp, nblksize); 1154 bzero(bp->b_data + loff, nblksize - loff); 1155 bp->b_bio2.bio_offset = chain->bref.data_off & 1156 HAMMER2_OFF_MASK; 1157 break; 1158 case HAMMER2_BREF_TYPE_INODE: 1159 allocbuf(bp, nblksize); 1160 bzero(bp->b_data + loff, nblksize - loff); 1161 bp->b_bio2.bio_offset = NOOFFSET; 1162 break; 1163 default: 1164 panic("hammer2_truncate_file: bad type"); 1165 break; 1166 } 1167 hammer2_chain_unlock(chain); 1168 if (bp->b_bcount == HAMMER2_PBUFSIZE) 1169 bp->b_flags |= B_CLUSTEROK; 1170 bdwrite(bp); 1171 } else { 1172 /* 1173 * Destroy clean buffer w/ wrong buffer size. Retain 1174 * backing store. 1175 */ 1176 bp->b_flags |= B_RELBUF; 1177 KKASSERT(bp->b_bio2.bio_offset == NOOFFSET); 1178 KKASSERT((bp->b_flags & B_DIRTY) == 0); 1179 bqrelse(bp); 1180 } 1181 } else if (loff) { 1182 /* 1183 * WARNING: This utilizes a device buffer for the data. 1184 * 1185 * This case should not occur because file truncations without 1186 * a vnode (and hence no logical buffer cache) should only 1187 * always truncate to 0-length. 1188 */ 1189 panic("hammer2_truncate_file: non-zero truncation, no-vnode"); 1190 #if 0 1191 chain = hammer2_chain_lookup(&parent, lbase, lbase, 0); 1192 if (chain) { 1193 switch(chain->bref.type) { 1194 case HAMMER2_BREF_TYPE_DATA: 1195 chain = hammer2_chain_resize(trans, ip, bp, 1196 parent, chain, 1197 hammer2_allocsize(nblksize), 1198 0); 1199 hammer2_chain_modify(hmp, chain, 0); 1200 bzero(chain->data->buf + loff, nblksize - loff); 1201 break; 1202 case HAMMER2_BREF_TYPE_INODE: 1203 if (loff < HAMMER2_EMBEDDED_BYTES) { 1204 hammer2_chain_modify(hmp, chain, 0); 1205 bzero(chain->data->ipdata.u.data + loff, 1206 HAMMER2_EMBEDDED_BYTES - loff); 1207 } 1208 break; 1209 } 1210 hammer2_chain_unlock(chain); 1211 } 1212 #endif 1213 } 1214 1215 /* 1216 * Clean up any fragmentory VM pages now that we have properly 1217 * resized the straddling buffer. These pages are no longer 1218 * part of the buffer. 1219 */ 1220 if (ip->vp) { 1221 nvtruncbuf(ip->vp, nsize, 1222 nblksize, (int)nsize & (nblksize - 1), 1223 1); 1224 } 1225 1226 /* 1227 * Destroy any physical blocks after the new EOF point. 1228 */ 1229 lbase = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64; 1230 chain = hammer2_chain_lookup(&parent, 1231 lbase, (hammer2_key_t)-1, 1232 HAMMER2_LOOKUP_NODATA); 1233 while (chain) { 1234 /* 1235 * Degenerate embedded data case, nothing to loop on. 1236 */ 1237 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 1238 hammer2_chain_unlock(chain); 1239 break; 1240 } 1241 1242 /* 1243 * Delete physical data blocks past the file EOF. 1244 */ 1245 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 1246 /*ip->delta_dcount -= chain->bytes;*/ 1247 hammer2_chain_delete(trans, parent, chain); 1248 } 1249 /* XXX check parent if empty indirect block & delete */ 1250 chain = hammer2_chain_next(&parent, chain, 1251 lbase, (hammer2_key_t)-1, 1252 HAMMER2_LOOKUP_NODATA); 1253 } 1254 hammer2_chain_lookup_done(parent); 1255 } 1256 1257 /* 1258 * Extend the size of a file. The inode must be locked. 1259 * 1260 * We may have to resize the block straddling the old EOF. 1261 */ 1262 static 1263 void 1264 hammer2_extend_file(hammer2_trans_t *trans, 1265 hammer2_inode_t *ip, hammer2_key_t nsize) 1266 { 1267 hammer2_inode_data_t *ipdata; 1268 hammer2_mount_t *hmp; 1269 hammer2_chain_t *parent; 1270 hammer2_chain_t *chain; 1271 struct buf *bp; 1272 hammer2_key_t osize; 1273 hammer2_key_t obase; 1274 hammer2_key_t nbase; 1275 hammer2_key_t leof; 1276 int oblksize; 1277 int nblksize; 1278 int nradix; 1279 int error; 1280 1281 KKASSERT(ip->vp); 1282 hmp = ip->hmp; 1283 1284 hammer2_chain_modify(trans, ip->chain, 0); 1285 ipdata = &ip->chain->data->ipdata; 1286 1287 /* 1288 * Nothing to do if the direct-data case is still intact 1289 */ 1290 if ((ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) && 1291 nsize <= HAMMER2_EMBEDDED_BYTES) { 1292 ipdata->size = nsize; 1293 nvextendbuf(ip->vp, 1294 ipdata->size, nsize, 1295 0, HAMMER2_EMBEDDED_BYTES, 1296 0, (int)nsize, 1297 1); 1298 return; 1299 } 1300 1301 /* 1302 * Calculate the blocksize at the original EOF and resize the block 1303 * if necessary. Adjust the file size in the inode. 1304 */ 1305 osize = ipdata->size; 1306 oblksize = hammer2_calc_logical(ip, osize, &obase, &leof); 1307 ipdata->size = nsize; 1308 nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof); 1309 1310 /* 1311 * Do all required vnode operations, but do not mess with the 1312 * buffer straddling the orignal EOF. 1313 */ 1314 nvextendbuf(ip->vp, 1315 ipdata->size, nsize, 1316 0, nblksize, 1317 0, (int)nsize & HAMMER2_PBUFMASK, 1318 1); 1319 1320 /* 1321 * Early return if we have no more work to do. 1322 */ 1323 if (obase == nbase && oblksize == nblksize && 1324 (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) { 1325 return; 1326 } 1327 1328 /* 1329 * We have work to do, including possibly resizing the buffer 1330 * at the previous EOF point and turning off DIRECTDATA mode. 1331 */ 1332 bp = NULL; 1333 if (((int)osize & HAMMER2_PBUFMASK)) { 1334 error = bread(ip->vp, obase, oblksize, &bp); 1335 KKASSERT(error == 0); 1336 } 1337 1338 /* 1339 * Disable direct-data mode by loading up a buffer cache buffer 1340 * with the data, then converting the inode data area into the 1341 * inode indirect block array area. 1342 */ 1343 if (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1344 ipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA; 1345 bzero(&ipdata->u.blockset, sizeof(ipdata->u.blockset)); 1346 } 1347 1348 /* 1349 * Resize the chain element at the old EOF. 1350 */ 1351 if (((int)osize & HAMMER2_PBUFMASK)) { 1352 retry: 1353 error = 0; 1354 parent = hammer2_chain_lookup_init(ip->chain, 0); 1355 nradix = hammer2_allocsize(nblksize); 1356 1357 chain = hammer2_chain_lookup(&parent, 1358 obase, obase, 1359 HAMMER2_LOOKUP_NODATA); 1360 if (chain == NULL) { 1361 error = hammer2_chain_create(trans, parent, &chain, 1362 obase, nblksize, 1363 HAMMER2_BREF_TYPE_DATA, 1364 nblksize); 1365 if (chain == NULL) { 1366 hammer2_chain_lookup_done(parent); 1367 panic("hammer2_chain_create: par=%p error=%d\n", 1368 parent, error); 1369 goto retry; 1370 } 1371 /*ip->delta_dcount += nblksize;*/ 1372 } else { 1373 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA); 1374 hammer2_chain_resize(trans, ip, bp, 1375 parent, &chain, 1376 nradix, 1377 HAMMER2_MODIFY_OPTDATA); 1378 } 1379 if (obase != nbase) { 1380 if (oblksize != HAMMER2_PBUFSIZE) 1381 allocbuf(bp, HAMMER2_PBUFSIZE); 1382 } else { 1383 if (oblksize != nblksize) 1384 allocbuf(bp, nblksize); 1385 } 1386 bp->b_bio2.bio_offset = chain->bref.data_off & 1387 HAMMER2_OFF_MASK; 1388 hammer2_chain_unlock(chain); 1389 if (bp->b_bcount == HAMMER2_PBUFSIZE) 1390 bp->b_flags |= B_CLUSTEROK; 1391 bdwrite(bp); 1392 hammer2_chain_lookup_done(parent); /* must be after bdwrite */ 1393 } 1394 } 1395 1396 static 1397 int 1398 hammer2_vop_nresolve(struct vop_nresolve_args *ap) 1399 { 1400 hammer2_inode_t *ip; 1401 hammer2_inode_t *dip; 1402 hammer2_mount_t *hmp; 1403 hammer2_chain_t *parent; 1404 hammer2_chain_t *chain; 1405 hammer2_chain_t *ochain; 1406 hammer2_trans_t trans; 1407 struct namecache *ncp; 1408 const uint8_t *name; 1409 size_t name_len; 1410 hammer2_key_t lhc; 1411 int error = 0; 1412 struct vnode *vp; 1413 1414 dip = VTOI(ap->a_dvp); 1415 hmp = dip->hmp; 1416 ncp = ap->a_nch->ncp; 1417 name = ncp->nc_name; 1418 name_len = ncp->nc_nlen; 1419 lhc = hammer2_dirhash(name, name_len); 1420 1421 /* 1422 * Note: In DragonFly the kernel handles '.' and '..'. 1423 */ 1424 hammer2_inode_lock_sh(dip); 1425 parent = hammer2_chain_lookup_init(dip->chain, HAMMER2_LOOKUP_SHARED); 1426 chain = hammer2_chain_lookup(&parent, 1427 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1428 HAMMER2_LOOKUP_SHARED); 1429 while (chain) { 1430 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 1431 name_len == chain->data->ipdata.name_len && 1432 bcmp(name, chain->data->ipdata.filename, name_len) == 0) { 1433 break; 1434 } 1435 chain = hammer2_chain_next(&parent, chain, 1436 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1437 HAMMER2_LOOKUP_SHARED); 1438 } 1439 hammer2_chain_lookup_done(parent); 1440 hammer2_inode_unlock_sh(dip); 1441 1442 /* 1443 * If the inode represents a forwarding entry for a hardlink we have 1444 * to locate the actual inode. The original ip is saved for possible 1445 * deconsolidation. (ip) will only be set to non-NULL when we have 1446 * to locate the real file via a hardlink. ip will be referenced but 1447 * not locked in that situation. chain is passed in locked and 1448 * returned locked. 1449 * 1450 * XXX what kind of chain lock? 1451 */ 1452 ochain = NULL; 1453 if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { 1454 error = hammer2_hardlink_find(dip, &chain, &ochain); 1455 if (error) { 1456 kprintf("hammer2: unable to find hardlink\n"); 1457 if (chain) { 1458 hammer2_chain_unlock(chain); 1459 chain = NULL; 1460 } 1461 goto failed; 1462 } 1463 } 1464 1465 /* 1466 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. 1467 * If an error occurs chain and ip are left alone. 1468 * 1469 * XXX upgrade shared lock? 1470 */ 1471 if (ochain && chain && chain->data->ipdata.nlinks == 1 && !hmp->ronly) { 1472 kprintf("hammer2: need to unconsolidate hardlink for %s\n", 1473 chain->data->ipdata.filename); 1474 /* XXX retain shared lock on dip? (currently not held) */ 1475 hammer2_trans_init(&trans, dip->hmp); 1476 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); 1477 hammer2_trans_done(&trans); 1478 } 1479 1480 /* 1481 * Acquire the related vnode 1482 * 1483 * NOTE: For error processing, only ENOENT resolves the namecache 1484 * entry to NULL, otherwise we just return the error and 1485 * leave the namecache unresolved. 1486 * 1487 * NOTE: multiple hammer2_inode structures can be aliased to the 1488 * same chain element, for example for hardlinks. This 1489 * use case does not 'reattach' inode associations that 1490 * might already exist, but always allocates a new one. 1491 * 1492 * WARNING: inode structure is locked exclusively via inode_get 1493 * but chain was locked shared. inode_unlock_ex() 1494 * will handle it properly. 1495 */ 1496 if (chain) { 1497 ip = hammer2_inode_get(hmp, dip->pmp, dip, chain); 1498 vp = hammer2_igetv(ip, &error); 1499 if (error == 0) { 1500 vn_unlock(vp); 1501 cache_setvp(ap->a_nch, vp); 1502 } else if (error == ENOENT) { 1503 cache_setvp(ap->a_nch, NULL); 1504 } 1505 hammer2_inode_unlock_ex(ip); 1506 1507 /* 1508 * The vp should not be released until after we've disposed 1509 * of our locks, because it might cause vop_inactive() to 1510 * be called. 1511 */ 1512 if (vp) 1513 vrele(vp); 1514 } else { 1515 error = ENOENT; 1516 cache_setvp(ap->a_nch, NULL); 1517 } 1518 failed: 1519 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, 1520 ("resolve error %d/%p chain %p ap %p\n", 1521 error, ap->a_nch->ncp->nc_vp, chain, ap)); 1522 if (ochain) 1523 hammer2_chain_drop(ochain); 1524 return error; 1525 } 1526 1527 static 1528 int 1529 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 1530 { 1531 hammer2_inode_t *dip; 1532 hammer2_inode_t *ip; 1533 hammer2_mount_t *hmp; 1534 int error; 1535 1536 dip = VTOI(ap->a_dvp); 1537 hmp = dip->hmp; 1538 1539 if ((ip = dip->pip) == NULL) { 1540 *ap->a_vpp = NULL; 1541 return ENOENT; 1542 } 1543 hammer2_inode_lock_ex(ip); 1544 *ap->a_vpp = hammer2_igetv(ip, &error); 1545 hammer2_inode_unlock_ex(ip); 1546 1547 return error; 1548 } 1549 1550 static 1551 int 1552 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) 1553 { 1554 hammer2_mount_t *hmp; 1555 hammer2_inode_t *dip; 1556 hammer2_inode_t *nip; 1557 hammer2_trans_t trans; 1558 struct namecache *ncp; 1559 const uint8_t *name; 1560 size_t name_len; 1561 int error; 1562 1563 dip = VTOI(ap->a_dvp); 1564 hmp = dip->hmp; 1565 if (hmp->ronly) 1566 return (EROFS); 1567 1568 ncp = ap->a_nch->ncp; 1569 name = ncp->nc_name; 1570 name_len = ncp->nc_nlen; 1571 1572 hammer2_trans_init(&trans, hmp); 1573 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1574 name, name_len, &error); 1575 if (error) { 1576 KKASSERT(nip == NULL); 1577 *ap->a_vpp = NULL; 1578 } else { 1579 *ap->a_vpp = hammer2_igetv(nip, &error); 1580 hammer2_inode_unlock_ex(nip); 1581 } 1582 hammer2_trans_done(&trans); 1583 1584 if (error == 0) { 1585 cache_setunresolved(ap->a_nch); 1586 cache_setvp(ap->a_nch, *ap->a_vpp); 1587 } 1588 return error; 1589 } 1590 1591 /* 1592 * Return the largest contiguous physical disk range for the logical 1593 * request. 1594 * 1595 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 1596 */ 1597 static 1598 int 1599 hammer2_vop_bmap(struct vop_bmap_args *ap) 1600 { 1601 struct vnode *vp; 1602 hammer2_mount_t *hmp; 1603 hammer2_inode_t *ip; 1604 hammer2_chain_t *parent; 1605 hammer2_chain_t *chain; 1606 hammer2_key_t lbeg; 1607 hammer2_key_t lend; 1608 hammer2_off_t pbeg; 1609 hammer2_off_t pbytes; 1610 hammer2_off_t array[HAMMER2_BMAP_COUNT][2]; 1611 int loff; 1612 int ai; 1613 1614 /* 1615 * Only supported on regular files 1616 * 1617 * Only supported for read operations (required for cluster_read). 1618 * The block allocation is delayed for write operations. 1619 */ 1620 vp = ap->a_vp; 1621 if (vp->v_type != VREG) 1622 return (EOPNOTSUPP); 1623 if (ap->a_cmd != BUF_CMD_READ) 1624 return (EOPNOTSUPP); 1625 1626 ip = VTOI(vp); 1627 hmp = ip->hmp; 1628 bzero(array, sizeof(array)); 1629 1630 /* 1631 * Calculate logical range 1632 */ 1633 KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0); 1634 lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI; 1635 lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1; 1636 if (lend < lbeg) 1637 lend = lbeg; 1638 loff = ap->a_loffset & HAMMER2_OFF_MASK_LO; 1639 1640 hammer2_inode_lock_sh(ip); 1641 parent = hammer2_chain_lookup_init(ip->chain, HAMMER2_LOOKUP_SHARED); 1642 chain = hammer2_chain_lookup(&parent, 1643 lbeg, lend, 1644 HAMMER2_LOOKUP_NODATA | 1645 HAMMER2_LOOKUP_SHARED); 1646 if (chain == NULL) { 1647 *ap->a_doffsetp = ZFOFFSET; 1648 hammer2_chain_lookup_done(parent); 1649 hammer2_inode_unlock_sh(ip); 1650 return (0); 1651 } 1652 1653 while (chain) { 1654 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 1655 ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE; 1656 KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT); 1657 array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK; 1658 array[ai][1] = chain->bytes; 1659 } 1660 chain = hammer2_chain_next(&parent, chain, 1661 lbeg, lend, 1662 HAMMER2_LOOKUP_NODATA | 1663 HAMMER2_LOOKUP_SHARED); 1664 } 1665 hammer2_chain_lookup_done(parent); 1666 hammer2_inode_unlock_sh(ip); 1667 1668 /* 1669 * If the requested loffset is not mappable physically we can't 1670 * bmap. The caller will have to access the file data via a 1671 * device buffer. 1672 */ 1673 if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_LBUFSIZE) { 1674 *ap->a_doffsetp = NOOFFSET; 1675 return (0); 1676 } 1677 1678 /* 1679 * Calculate the physical disk offset range for array[0] 1680 */ 1681 pbeg = array[0][0] + loff; 1682 pbytes = array[0][1] - loff; 1683 1684 for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) { 1685 if (array[ai][0] != pbeg + pbytes) 1686 break; 1687 pbytes += array[ai][1]; 1688 } 1689 1690 *ap->a_doffsetp = pbeg; 1691 if (ap->a_runp) 1692 *ap->a_runp = pbytes; 1693 return (0); 1694 } 1695 1696 static 1697 int 1698 hammer2_vop_open(struct vop_open_args *ap) 1699 { 1700 return vop_stdopen(ap); 1701 } 1702 1703 /* 1704 * hammer2_vop_advlock { vp, id, op, fl, flags } 1705 */ 1706 static 1707 int 1708 hammer2_vop_advlock(struct vop_advlock_args *ap) 1709 { 1710 hammer2_inode_t *ip = VTOI(ap->a_vp); 1711 hammer2_off_t size; 1712 1713 hammer2_inode_lock_sh(ip); 1714 size = ip->chain->data->ipdata.size; 1715 hammer2_inode_unlock_sh(ip); 1716 return (lf_advlock(ap, &ip->advlock, size)); 1717 } 1718 1719 1720 static 1721 int 1722 hammer2_vop_close(struct vop_close_args *ap) 1723 { 1724 return vop_stdclose(ap); 1725 } 1726 1727 /* 1728 * hammer2_vop_nlink { nch, dvp, vp, cred } 1729 * 1730 * Create a hardlink from (vp) to {dvp, nch}. 1731 */ 1732 static 1733 int 1734 hammer2_vop_nlink(struct vop_nlink_args *ap) 1735 { 1736 hammer2_inode_t *dip; /* target directory to create link in */ 1737 hammer2_inode_t *ip; /* inode we are hardlinking to */ 1738 hammer2_mount_t *hmp; 1739 hammer2_chain_t *chain; 1740 hammer2_trans_t trans; 1741 struct namecache *ncp; 1742 const uint8_t *name; 1743 size_t name_len; 1744 int error; 1745 1746 dip = VTOI(ap->a_dvp); 1747 hmp = dip->hmp; 1748 if (hmp->ronly) 1749 return (EROFS); 1750 1751 ncp = ap->a_nch->ncp; 1752 name = ncp->nc_name; 1753 name_len = ncp->nc_nlen; 1754 hammer2_trans_init(&trans, hmp); 1755 1756 /* 1757 * ip represents the file being hardlinked. The file could be a 1758 * normal file or a hardlink target if it has already been hardlinked. 1759 * If ip is a hardlinked target then ip->pip represents the location 1760 * of the hardlinked target, NOT the location of the hardlink pointer. 1761 * 1762 * Bump nlinks and potentially also create or move the hardlink 1763 * target in the parent directory common to (ip) and (dip). The 1764 * consolidation code can modify ip->chain and ip->pip. The 1765 * returned chain is locked. 1766 */ 1767 ip = VTOI(ap->a_vp); 1768 hammer2_inode_ref(ip); 1769 error = hammer2_hardlink_consolidate(&trans, ip, &chain, dip, 1); 1770 if (error) 1771 goto done; 1772 1773 /* 1774 * Create a directory entry connected to the specified chain. 1775 * This function unlocks and NULL's chain on return. 1776 */ 1777 error = hammer2_inode_connect(&trans, dip, ip, &chain, name, name_len); 1778 if (chain) { 1779 hammer2_chain_unlock(chain); 1780 chain = NULL; 1781 } 1782 if (error == 0) { 1783 cache_setunresolved(ap->a_nch); 1784 cache_setvp(ap->a_nch, ap->a_vp); 1785 } 1786 done: 1787 hammer2_inode_drop(ip); 1788 hammer2_trans_done(&trans); 1789 1790 return error; 1791 } 1792 1793 /* 1794 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap } 1795 * 1796 * The operating system has already ensured that the directory entry 1797 * does not exist and done all appropriate namespace locking. 1798 */ 1799 static 1800 int 1801 hammer2_vop_ncreate(struct vop_ncreate_args *ap) 1802 { 1803 hammer2_mount_t *hmp; 1804 hammer2_inode_t *dip; 1805 hammer2_inode_t *nip; 1806 hammer2_trans_t trans; 1807 struct namecache *ncp; 1808 const uint8_t *name; 1809 size_t name_len; 1810 int error; 1811 1812 dip = VTOI(ap->a_dvp); 1813 hmp = dip->hmp; 1814 if (hmp->ronly) 1815 return (EROFS); 1816 1817 ncp = ap->a_nch->ncp; 1818 name = ncp->nc_name; 1819 name_len = ncp->nc_nlen; 1820 hammer2_trans_init(&trans, hmp); 1821 1822 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1823 name, name_len, &error); 1824 if (error) { 1825 KKASSERT(nip == NULL); 1826 *ap->a_vpp = NULL; 1827 } else { 1828 *ap->a_vpp = hammer2_igetv(nip, &error); 1829 hammer2_inode_unlock_ex(nip); 1830 } 1831 hammer2_trans_done(&trans); 1832 1833 if (error == 0) { 1834 cache_setunresolved(ap->a_nch); 1835 cache_setvp(ap->a_nch, *ap->a_vpp); 1836 } 1837 return error; 1838 } 1839 1840 /* 1841 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1842 */ 1843 static 1844 int 1845 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) 1846 { 1847 hammer2_mount_t *hmp; 1848 hammer2_inode_t *dip; 1849 hammer2_inode_t *nip; 1850 hammer2_trans_t trans; 1851 struct namecache *ncp; 1852 const uint8_t *name; 1853 size_t name_len; 1854 int error; 1855 1856 dip = VTOI(ap->a_dvp); 1857 hmp = dip->hmp; 1858 if (hmp->ronly) 1859 return (EROFS); 1860 1861 ncp = ap->a_nch->ncp; 1862 name = ncp->nc_name; 1863 name_len = ncp->nc_nlen; 1864 hammer2_trans_init(&trans, hmp); 1865 1866 ap->a_vap->va_type = VLNK; /* enforce type */ 1867 1868 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, 1869 name, name_len, &error); 1870 if (error) { 1871 KKASSERT(nip == NULL); 1872 *ap->a_vpp = NULL; 1873 hammer2_trans_done(&trans); 1874 return error; 1875 } 1876 *ap->a_vpp = hammer2_igetv(nip, &error); 1877 1878 /* 1879 * Build the softlink (~like file data) and finalize the namecache. 1880 */ 1881 if (error == 0) { 1882 size_t bytes; 1883 struct uio auio; 1884 struct iovec aiov; 1885 hammer2_inode_data_t *nipdata; 1886 1887 nipdata = &nip->chain->data->ipdata; 1888 bytes = strlen(ap->a_target); 1889 1890 if (bytes <= HAMMER2_EMBEDDED_BYTES) { 1891 KKASSERT(nipdata->op_flags & 1892 HAMMER2_OPFLAG_DIRECTDATA); 1893 bcopy(ap->a_target, nipdata->u.data, bytes); 1894 nipdata->size = bytes; 1895 } else { 1896 bzero(&auio, sizeof(auio)); 1897 bzero(&aiov, sizeof(aiov)); 1898 auio.uio_iov = &aiov; 1899 auio.uio_segflg = UIO_SYSSPACE; 1900 auio.uio_rw = UIO_WRITE; 1901 auio.uio_resid = bytes; 1902 auio.uio_iovcnt = 1; 1903 auio.uio_td = curthread; 1904 aiov.iov_base = ap->a_target; 1905 aiov.iov_len = bytes; 1906 error = hammer2_write_file(nip, &auio, IO_APPEND, 0); 1907 /* XXX handle error */ 1908 error = 0; 1909 } 1910 } 1911 hammer2_inode_unlock_ex(nip); 1912 hammer2_trans_done(&trans); 1913 1914 /* 1915 * Finalize namecache 1916 */ 1917 if (error == 0) { 1918 cache_setunresolved(ap->a_nch); 1919 cache_setvp(ap->a_nch, *ap->a_vpp); 1920 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */ 1921 } 1922 return error; 1923 } 1924 1925 /* 1926 * hammer2_vop_nremove { nch, dvp, cred } 1927 */ 1928 static 1929 int 1930 hammer2_vop_nremove(struct vop_nremove_args *ap) 1931 { 1932 hammer2_inode_t *dip; 1933 hammer2_mount_t *hmp; 1934 hammer2_trans_t trans; 1935 struct namecache *ncp; 1936 const uint8_t *name; 1937 size_t name_len; 1938 int error; 1939 1940 dip = VTOI(ap->a_dvp); 1941 hmp = dip->hmp; 1942 if (hmp->ronly) 1943 return(EROFS); 1944 1945 ncp = ap->a_nch->ncp; 1946 name = ncp->nc_name; 1947 name_len = ncp->nc_nlen; 1948 hammer2_trans_init(&trans, hmp); 1949 error = hammer2_unlink_file(&trans, dip, name, name_len, 0); 1950 hammer2_trans_done(&trans); 1951 if (error == 0) { 1952 cache_unlink(ap->a_nch); 1953 } 1954 return (error); 1955 } 1956 1957 /* 1958 * hammer2_vop_nrmdir { nch, dvp, cred } 1959 */ 1960 static 1961 int 1962 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) 1963 { 1964 hammer2_inode_t *dip; 1965 hammer2_mount_t *hmp; 1966 hammer2_trans_t trans; 1967 struct namecache *ncp; 1968 const uint8_t *name; 1969 size_t name_len; 1970 int error; 1971 1972 dip = VTOI(ap->a_dvp); 1973 hmp = dip->hmp; 1974 if (hmp->ronly) 1975 return(EROFS); 1976 1977 ncp = ap->a_nch->ncp; 1978 name = ncp->nc_name; 1979 name_len = ncp->nc_nlen; 1980 1981 hammer2_trans_init(&trans, hmp); 1982 error = hammer2_unlink_file(&trans, dip, name, name_len, 1); 1983 hammer2_trans_done(&trans); 1984 if (error == 0) { 1985 cache_unlink(ap->a_nch); 1986 } 1987 return (error); 1988 } 1989 1990 /* 1991 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1992 */ 1993 static 1994 int 1995 hammer2_vop_nrename(struct vop_nrename_args *ap) 1996 { 1997 struct namecache *fncp; 1998 struct namecache *tncp; 1999 hammer2_inode_t *fdip; 2000 hammer2_inode_t *tdip; 2001 hammer2_inode_t *ip; 2002 hammer2_chain_t *chain; 2003 hammer2_mount_t *hmp; 2004 hammer2_trans_t trans; 2005 const uint8_t *fname; 2006 size_t fname_len; 2007 const uint8_t *tname; 2008 size_t tname_len; 2009 int error; 2010 2011 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 2012 return(EXDEV); 2013 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 2014 return(EXDEV); 2015 2016 fdip = VTOI(ap->a_fdvp); /* source directory */ 2017 tdip = VTOI(ap->a_tdvp); /* target directory */ 2018 2019 hmp = fdip->hmp; /* check read-only filesystem */ 2020 if (hmp->ronly) 2021 return(EROFS); 2022 2023 fncp = ap->a_fnch->ncp; /* entry name in source */ 2024 fname = fncp->nc_name; 2025 fname_len = fncp->nc_nlen; 2026 2027 tncp = ap->a_tnch->ncp; /* entry name in target */ 2028 tname = tncp->nc_name; 2029 tname_len = tncp->nc_nlen; 2030 2031 hammer2_trans_init(&trans, hmp); 2032 2033 /* 2034 * ip is the inode being removed. If this is a hardlink then 2035 * ip represents the actual file and not the hardlink marker. 2036 */ 2037 ip = VTOI(fncp->nc_vp); 2038 chain = NULL; 2039 2040 /* 2041 * Keep a tight grip on the inode so the temporary unlinking from 2042 * the source location prior to linking to the target location 2043 * does not cause the chain to be destroyed. 2044 * 2045 * NOTE: To avoid deadlocks we cannot lock (ip) while we are 2046 * unlinking elements from their directories. Locking 2047 * the nlinks field does not lock the whole inode. 2048 */ 2049 hammer2_inode_ref(ip); 2050 2051 /* 2052 * Remove target if it exists 2053 */ 2054 error = hammer2_unlink_file(&trans, tdip, tname, tname_len, -1); 2055 if (error && error != ENOENT) 2056 goto done; 2057 cache_setunresolved(ap->a_tnch); 2058 2059 /* 2060 * When renaming a hardlinked file we may have to re-consolidate 2061 * the location of the hardlink target. Since the element is simply 2062 * being moved, nlinks is not modified in this case. 2063 * 2064 * If ip represents a regular file the consolidation code essentially 2065 * does nothing other than return the locked chain. 2066 * 2067 * The returned chain will be locked. 2068 */ 2069 error = hammer2_hardlink_consolidate(&trans, ip, &chain, tdip, 0); 2070 if (error) 2071 goto done; 2072 2073 /* 2074 * Disconnect (fdip, fname) from the source directory. This will 2075 * disconnect (ip) if it represents a direct file. If (ip) represents 2076 * a hardlink the HARDLINK pointer object will be removed but the 2077 * hardlink will stay intact. 2078 * 2079 * The target chain may be marked DELETED but will not be destroyed 2080 * since we retain our hold on ip and chain. 2081 */ 2082 error = hammer2_unlink_file(&trans, fdip, fname, fname_len, -1); 2083 KKASSERT(error != EAGAIN); 2084 if (error) 2085 goto done; 2086 2087 /* 2088 * Reconnect ip to target directory using chain. Chains cannot 2089 * actually be moved, so this will duplicate the chain in the new 2090 * spot and assign it to the ip, replacing the old chain. 2091 * 2092 * WARNING: chain locks can lock buffer cache buffers, to avoid 2093 * deadlocks we want to unlock before issuing a cache_*() 2094 * op (that might have to lock a vnode). 2095 */ 2096 error = hammer2_inode_connect(&trans, tdip, 2097 ip, &chain, 2098 tname, tname_len); 2099 if (error == 0) { 2100 if (chain) { 2101 hammer2_chain_unlock(chain); 2102 chain = NULL; 2103 } 2104 cache_rename(ap->a_fnch, ap->a_tnch); 2105 } 2106 done: 2107 if (chain) 2108 hammer2_chain_unlock(chain); 2109 hammer2_inode_drop(ip); 2110 hammer2_trans_done(&trans); 2111 2112 return (error); 2113 } 2114 2115 static int hammer2_strategy_read(struct vop_strategy_args *ap); 2116 static int hammer2_strategy_write(struct vop_strategy_args *ap); 2117 2118 static 2119 int 2120 hammer2_vop_strategy(struct vop_strategy_args *ap) 2121 { 2122 struct bio *biop; 2123 struct buf *bp; 2124 int error; 2125 2126 biop = ap->a_bio; 2127 bp = biop->bio_buf; 2128 2129 switch(bp->b_cmd) { 2130 case BUF_CMD_READ: 2131 error = hammer2_strategy_read(ap); 2132 ++hammer2_iod_file_read; 2133 break; 2134 case BUF_CMD_WRITE: 2135 error = hammer2_strategy_write(ap); 2136 ++hammer2_iod_file_write; 2137 break; 2138 default: 2139 bp->b_error = error = EINVAL; 2140 bp->b_flags |= B_ERROR; 2141 biodone(biop); 2142 break; 2143 } 2144 2145 return (error); 2146 } 2147 2148 static 2149 int 2150 hammer2_strategy_read(struct vop_strategy_args *ap) 2151 { 2152 struct buf *bp; 2153 struct bio *bio; 2154 struct bio *nbio; 2155 hammer2_mount_t *hmp; 2156 hammer2_inode_t *ip; 2157 hammer2_chain_t *parent; 2158 hammer2_chain_t *chain; 2159 hammer2_key_t lbase; 2160 2161 bio = ap->a_bio; 2162 bp = bio->bio_buf; 2163 ip = VTOI(ap->a_vp); 2164 hmp = ip->hmp; 2165 nbio = push_bio(bio); 2166 2167 lbase = bio->bio_offset; 2168 chain = NULL; 2169 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); 2170 2171 /* 2172 * We must characterize the logical->physical translation if it 2173 * has not already been cached. 2174 * 2175 * Physical data references < LBUFSIZE are never cached. This 2176 * includes both small-block allocations and inode-embedded data. 2177 */ 2178 if (nbio->bio_offset == NOOFFSET) { 2179 hammer2_inode_lock_sh(ip); 2180 2181 parent = hammer2_chain_lookup_init(ip->chain, 2182 HAMMER2_LOOKUP_SHARED); 2183 2184 chain = hammer2_chain_lookup(&parent, lbase, lbase, 2185 HAMMER2_LOOKUP_NODATA | 2186 HAMMER2_LOOKUP_SHARED); 2187 if (chain == NULL) { 2188 /* 2189 * Data is zero-fill 2190 */ 2191 nbio->bio_offset = ZFOFFSET; 2192 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2193 /* 2194 * Data is embedded in the inode (do nothing) 2195 */ 2196 KKASSERT(chain == parent); 2197 hammer2_chain_unlock(chain); 2198 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2199 /* 2200 * Data is on-media 2201 */ 2202 KKASSERT(bp->b_bcount == chain->bytes); 2203 nbio->bio_offset = chain->bref.data_off & 2204 HAMMER2_OFF_MASK; 2205 hammer2_chain_unlock(chain); 2206 KKASSERT(nbio->bio_offset != 0); 2207 } else { 2208 panic("hammer2_strategy_read: unknown bref type"); 2209 } 2210 hammer2_chain_lookup_done(parent); 2211 hammer2_inode_unlock_sh(ip); 2212 } 2213 2214 if (hammer2_debug & 0x0020) { 2215 kprintf("read %016jx %016jx\n", 2216 bio->bio_offset, nbio->bio_offset); 2217 } 2218 2219 if (nbio->bio_offset == ZFOFFSET) { 2220 /* 2221 * Data is zero-fill 2222 */ 2223 bp->b_resid = 0; 2224 bp->b_error = 0; 2225 bzero(bp->b_data, bp->b_bcount); 2226 biodone(nbio); 2227 } else if (nbio->bio_offset != NOOFFSET) { 2228 /* 2229 * Forward direct IO to the device 2230 */ 2231 vn_strategy(hmp->devvp, nbio); 2232 } else { 2233 /* 2234 * Data is embedded in inode. 2235 */ 2236 bcopy(chain->data->ipdata.u.data, bp->b_data, 2237 HAMMER2_EMBEDDED_BYTES); 2238 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES, 2239 bp->b_bcount - HAMMER2_EMBEDDED_BYTES); 2240 bp->b_resid = 0; 2241 bp->b_error = 0; 2242 biodone(nbio); 2243 } 2244 return (0); 2245 } 2246 2247 static 2248 int 2249 hammer2_strategy_write(struct vop_strategy_args *ap) 2250 { 2251 struct buf *bp; 2252 struct bio *bio; 2253 struct bio *nbio; 2254 hammer2_mount_t *hmp; 2255 hammer2_inode_t *ip; 2256 2257 bio = ap->a_bio; 2258 bp = bio->bio_buf; 2259 ip = VTOI(ap->a_vp); 2260 hmp = ip->hmp; 2261 nbio = push_bio(bio); 2262 2263 KKASSERT((bio->bio_offset & HAMMER2_PBUFMASK64) == 0); 2264 KKASSERT(nbio->bio_offset != 0 && nbio->bio_offset != ZFOFFSET); 2265 2266 if (nbio->bio_offset == NOOFFSET) { 2267 /* 2268 * Must be embedded in the inode. 2269 * 2270 * Because the inode is dirty, the chain must exist whether 2271 * the inode is locked or not. XXX 2272 */ 2273 KKASSERT(bio->bio_offset == 0); 2274 KKASSERT(ip->chain && ip->chain->data); 2275 bcopy(bp->b_data, ip->chain->data->ipdata.u.data, 2276 HAMMER2_EMBEDDED_BYTES); 2277 bp->b_resid = 0; 2278 bp->b_error = 0; 2279 biodone(nbio); 2280 2281 /* 2282 * This special flag does not follow the normal MODIFY rules 2283 * because we might deadlock on ip. Instead we depend on 2284 * VOP_FSYNC() to detect the case. 2285 */ 2286 atomic_set_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED); 2287 } else { 2288 /* 2289 * Forward direct IO to the device 2290 */ 2291 vn_strategy(hmp->devvp, nbio); 2292 } 2293 return (0); 2294 } 2295 2296 /* 2297 * hammer2_vop_ioctl { vp, command, data, fflag, cred } 2298 */ 2299 static 2300 int 2301 hammer2_vop_ioctl(struct vop_ioctl_args *ap) 2302 { 2303 hammer2_mount_t *hmp; 2304 hammer2_inode_t *ip; 2305 int error; 2306 2307 ip = VTOI(ap->a_vp); 2308 hmp = ip->hmp; 2309 2310 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, 2311 ap->a_fflag, ap->a_cred); 2312 return (error); 2313 } 2314 2315 static 2316 int 2317 hammer2_vop_mountctl(struct vop_mountctl_args *ap) 2318 { 2319 struct mount *mp; 2320 hammer2_pfsmount_t *pmp; 2321 int rc; 2322 2323 switch (ap->a_op) { 2324 case (MOUNTCTL_SET_EXPORT): 2325 mp = ap->a_head.a_ops->head.vv_mount; 2326 pmp = MPTOPMP(mp); 2327 2328 if (ap->a_ctllen != sizeof(struct export_args)) 2329 rc = (EINVAL); 2330 else 2331 rc = vfs_export(mp, &pmp->export, 2332 (const struct export_args *)ap->a_ctl); 2333 break; 2334 default: 2335 rc = vop_stdmountctl(ap); 2336 break; 2337 } 2338 return (rc); 2339 } 2340 2341 struct vop_ops hammer2_vnode_vops = { 2342 .vop_default = vop_defaultop, 2343 .vop_fsync = hammer2_vop_fsync, 2344 .vop_getpages = vop_stdgetpages, 2345 .vop_putpages = vop_stdputpages, 2346 .vop_access = hammer2_vop_access, 2347 .vop_advlock = hammer2_vop_advlock, 2348 .vop_close = hammer2_vop_close, 2349 .vop_nlink = hammer2_vop_nlink, 2350 .vop_ncreate = hammer2_vop_ncreate, 2351 .vop_nsymlink = hammer2_vop_nsymlink, 2352 .vop_nremove = hammer2_vop_nremove, 2353 .vop_nrmdir = hammer2_vop_nrmdir, 2354 .vop_nrename = hammer2_vop_nrename, 2355 .vop_getattr = hammer2_vop_getattr, 2356 .vop_setattr = hammer2_vop_setattr, 2357 .vop_readdir = hammer2_vop_readdir, 2358 .vop_readlink = hammer2_vop_readlink, 2359 .vop_getpages = vop_stdgetpages, 2360 .vop_putpages = vop_stdputpages, 2361 .vop_read = hammer2_vop_read, 2362 .vop_write = hammer2_vop_write, 2363 .vop_open = hammer2_vop_open, 2364 .vop_inactive = hammer2_vop_inactive, 2365 .vop_reclaim = hammer2_vop_reclaim, 2366 .vop_nresolve = hammer2_vop_nresolve, 2367 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, 2368 .vop_nmkdir = hammer2_vop_nmkdir, 2369 .vop_ioctl = hammer2_vop_ioctl, 2370 .vop_mountctl = hammer2_vop_mountctl, 2371 .vop_bmap = hammer2_vop_bmap, 2372 .vop_strategy = hammer2_vop_strategy, 2373 }; 2374 2375 struct vop_ops hammer2_spec_vops = { 2376 2377 }; 2378 2379 struct vop_ops hammer2_fifo_vops = { 2380 2381 }; 2382