1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.84 2008/07/09 10:29:20 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/namecache.h> 42 #include <sys/vnode.h> 43 #include <sys/lockf.h> 44 #include <sys/event.h> 45 #include <sys/stat.h> 46 #include <sys/dirent.h> 47 #include <vm/vm_extern.h> 48 #include <vfs/fifofs/fifo.h> 49 #include "hammer.h" 50 51 /* 52 * USERFS VNOPS 53 */ 54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/ 55 static int hammer_vop_fsync(struct vop_fsync_args *); 56 static int hammer_vop_read(struct vop_read_args *); 57 static int hammer_vop_write(struct vop_write_args *); 58 static int hammer_vop_access(struct vop_access_args *); 59 static int hammer_vop_advlock(struct vop_advlock_args *); 60 static int hammer_vop_close(struct vop_close_args *); 61 static int hammer_vop_ncreate(struct vop_ncreate_args *); 62 static int hammer_vop_getattr(struct vop_getattr_args *); 63 static int hammer_vop_nresolve(struct vop_nresolve_args *); 64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *); 65 static int hammer_vop_nlink(struct vop_nlink_args *); 66 static int hammer_vop_nmkdir(struct vop_nmkdir_args *); 67 static int hammer_vop_nmknod(struct vop_nmknod_args *); 68 static int hammer_vop_open(struct vop_open_args *); 69 static int hammer_vop_pathconf(struct vop_pathconf_args *); 70 static int hammer_vop_print(struct vop_print_args *); 71 static int hammer_vop_readdir(struct vop_readdir_args *); 72 static int hammer_vop_readlink(struct vop_readlink_args *); 73 static int hammer_vop_nremove(struct vop_nremove_args *); 74 static int hammer_vop_nrename(struct vop_nrename_args *); 75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *); 76 static int hammer_vop_setattr(struct vop_setattr_args *); 77 static int hammer_vop_strategy(struct vop_strategy_args *); 78 static int hammer_vop_bmap(struct vop_bmap_args *ap); 79 static int hammer_vop_nsymlink(struct vop_nsymlink_args *); 80 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *); 81 static int hammer_vop_ioctl(struct vop_ioctl_args *); 82 static int hammer_vop_mountctl(struct vop_mountctl_args *); 83 84 static int hammer_vop_fifoclose (struct vop_close_args *); 85 static int hammer_vop_fiforead (struct vop_read_args *); 86 static int hammer_vop_fifowrite (struct vop_write_args *); 87 88 static int hammer_vop_specclose (struct vop_close_args *); 89 static int hammer_vop_specread (struct vop_read_args *); 90 static int hammer_vop_specwrite (struct vop_write_args *); 91 92 struct vop_ops hammer_vnode_vops = { 93 .vop_default = vop_defaultop, 94 .vop_fsync = hammer_vop_fsync, 95 .vop_getpages = vop_stdgetpages, 96 .vop_putpages = vop_stdputpages, 97 .vop_read = hammer_vop_read, 98 .vop_write = hammer_vop_write, 99 .vop_access = hammer_vop_access, 100 .vop_advlock = hammer_vop_advlock, 101 .vop_close = hammer_vop_close, 102 .vop_ncreate = hammer_vop_ncreate, 103 .vop_getattr = hammer_vop_getattr, 104 .vop_inactive = hammer_vop_inactive, 105 .vop_reclaim = hammer_vop_reclaim, 106 .vop_nresolve = hammer_vop_nresolve, 107 .vop_nlookupdotdot = hammer_vop_nlookupdotdot, 108 .vop_nlink = hammer_vop_nlink, 109 .vop_nmkdir = hammer_vop_nmkdir, 110 .vop_nmknod = hammer_vop_nmknod, 111 .vop_open = hammer_vop_open, 112 .vop_pathconf = hammer_vop_pathconf, 113 .vop_print = hammer_vop_print, 114 .vop_readdir = hammer_vop_readdir, 115 .vop_readlink = hammer_vop_readlink, 116 .vop_nremove = hammer_vop_nremove, 117 .vop_nrename = hammer_vop_nrename, 118 .vop_nrmdir = hammer_vop_nrmdir, 119 .vop_setattr = hammer_vop_setattr, 120 .vop_bmap = hammer_vop_bmap, 121 .vop_strategy = hammer_vop_strategy, 122 .vop_nsymlink = hammer_vop_nsymlink, 123 .vop_nwhiteout = hammer_vop_nwhiteout, 124 .vop_ioctl = hammer_vop_ioctl, 125 .vop_mountctl = hammer_vop_mountctl 126 }; 127 128 struct vop_ops hammer_spec_vops = { 129 .vop_default = spec_vnoperate, 130 .vop_fsync = hammer_vop_fsync, 131 .vop_read = hammer_vop_specread, 132 .vop_write = hammer_vop_specwrite, 133 .vop_access = hammer_vop_access, 134 .vop_close = hammer_vop_specclose, 135 .vop_getattr = hammer_vop_getattr, 136 .vop_inactive = hammer_vop_inactive, 137 .vop_reclaim = hammer_vop_reclaim, 138 .vop_setattr = hammer_vop_setattr 139 }; 140 141 struct vop_ops hammer_fifo_vops = { 142 .vop_default = fifo_vnoperate, 143 .vop_fsync = hammer_vop_fsync, 144 .vop_read = hammer_vop_fiforead, 145 .vop_write = hammer_vop_fifowrite, 146 .vop_access = hammer_vop_access, 147 .vop_close = hammer_vop_fifoclose, 148 .vop_getattr = hammer_vop_getattr, 149 .vop_inactive = hammer_vop_inactive, 150 .vop_reclaim = hammer_vop_reclaim, 151 .vop_setattr = hammer_vop_setattr 152 }; 153 154 #ifdef DEBUG_TRUNCATE 155 struct hammer_inode *HammerTruncIp; 156 #endif 157 158 static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, 159 struct vnode *dvp, struct ucred *cred, int flags); 160 static int hammer_vop_strategy_read(struct vop_strategy_args *ap); 161 static int hammer_vop_strategy_write(struct vop_strategy_args *ap); 162 163 #if 0 164 static 165 int 166 hammer_vop_vnoperate(struct vop_generic_args *) 167 { 168 return (VOCALL(&hammer_vnode_vops, ap)); 169 } 170 #endif 171 172 /* 173 * hammer_vop_fsync { vp, waitfor } 174 * 175 * fsync() an inode to disk and wait for it to be completely committed 176 * such that the information would not be undone if a crash occured after 177 * return. 178 */ 179 static 180 int 181 hammer_vop_fsync(struct vop_fsync_args *ap) 182 { 183 hammer_inode_t ip = VTOI(ap->a_vp); 184 185 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL); 186 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 187 if (ap->a_waitfor == MNT_WAIT) 188 hammer_wait_inode(ip); 189 return (ip->error); 190 } 191 192 /* 193 * hammer_vop_read { vp, uio, ioflag, cred } 194 */ 195 static 196 int 197 hammer_vop_read(struct vop_read_args *ap) 198 { 199 struct hammer_transaction trans; 200 hammer_inode_t ip; 201 off_t offset; 202 struct buf *bp; 203 struct uio *uio; 204 int error; 205 int n; 206 int seqcount; 207 int ioseqcount; 208 int blksize; 209 210 if (ap->a_vp->v_type != VREG) 211 return (EINVAL); 212 ip = VTOI(ap->a_vp); 213 error = 0; 214 uio = ap->a_uio; 215 216 /* 217 * Allow the UIO's size to override the sequential heuristic. 218 */ 219 blksize = hammer_blocksize(uio->uio_offset); 220 seqcount = (uio->uio_resid + (blksize - 1)) / blksize; 221 ioseqcount = ap->a_ioflag >> 16; 222 if (seqcount < ioseqcount) 223 seqcount = ioseqcount; 224 225 hammer_start_transaction(&trans, ip->hmp); 226 227 /* 228 * Access the data typically in HAMMER_BUFSIZE blocks via the 229 * buffer cache, but HAMMER may use a variable block size based 230 * on the offset. 231 */ 232 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) { 233 int64_t base_offset; 234 int64_t file_limit; 235 236 blksize = hammer_blocksize(uio->uio_offset); 237 offset = (int)uio->uio_offset & (blksize - 1); 238 base_offset = uio->uio_offset - offset; 239 240 if (hammer_debug_cluster_enable) { 241 /* 242 * Use file_limit to prevent cluster_read() from 243 * creating buffers of the wrong block size past 244 * the demarc. 245 */ 246 file_limit = ip->ino_data.size; 247 if (base_offset < HAMMER_XDEMARC && 248 file_limit > HAMMER_XDEMARC) { 249 file_limit = HAMMER_XDEMARC; 250 } 251 error = cluster_read(ap->a_vp, 252 file_limit, base_offset, 253 blksize, MAXPHYS, 254 seqcount, &bp); 255 } else { 256 error = bread(ap->a_vp, base_offset, blksize, &bp); 257 } 258 if (error) { 259 kprintf("error %d\n", error); 260 brelse(bp); 261 break; 262 } 263 264 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 265 n = blksize - offset; 266 if (n > uio->uio_resid) 267 n = uio->uio_resid; 268 if (n > ip->ino_data.size - uio->uio_offset) 269 n = (int)(ip->ino_data.size - uio->uio_offset); 270 error = uiomove((char *)bp->b_data + offset, n, uio); 271 272 /* data has a lower priority then meta-data */ 273 bp->b_flags |= B_AGE; 274 bqrelse(bp); 275 if (error) 276 break; 277 } 278 if ((ip->flags & HAMMER_INODE_RO) == 0 && 279 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) { 280 ip->ino_data.atime = trans.time; 281 hammer_modify_inode(ip, HAMMER_INODE_ATIME); 282 } 283 hammer_done_transaction(&trans); 284 return (error); 285 } 286 287 /* 288 * hammer_vop_write { vp, uio, ioflag, cred } 289 */ 290 static 291 int 292 hammer_vop_write(struct vop_write_args *ap) 293 { 294 struct hammer_transaction trans; 295 struct hammer_inode *ip; 296 hammer_mount_t hmp; 297 struct uio *uio; 298 int offset; 299 off_t base_offset; 300 struct buf *bp; 301 int error; 302 int n; 303 int flags; 304 int delta; 305 int seqcount; 306 307 if (ap->a_vp->v_type != VREG) 308 return (EINVAL); 309 ip = VTOI(ap->a_vp); 310 hmp = ip->hmp; 311 error = 0; 312 seqcount = ap->a_ioflag >> 16; 313 314 if (ip->flags & HAMMER_INODE_RO) 315 return (EROFS); 316 317 /* 318 * Create a transaction to cover the operations we perform. 319 */ 320 hammer_start_transaction(&trans, hmp); 321 uio = ap->a_uio; 322 323 /* 324 * Check append mode 325 */ 326 if (ap->a_ioflag & IO_APPEND) 327 uio->uio_offset = ip->ino_data.size; 328 329 /* 330 * Check for illegal write offsets. Valid range is 0...2^63-1. 331 * 332 * NOTE: the base_off assignment is required to work around what 333 * I consider to be a GCC-4 optimization bug. 334 */ 335 if (uio->uio_offset < 0) { 336 hammer_done_transaction(&trans); 337 return (EFBIG); 338 } 339 base_offset = uio->uio_offset + uio->uio_resid; /* work around gcc-4 */ 340 if (uio->uio_resid > 0 && base_offset <= 0) { 341 hammer_done_transaction(&trans); 342 return (EFBIG); 343 } 344 345 /* 346 * Access the data typically in HAMMER_BUFSIZE blocks via the 347 * buffer cache, but HAMMER may use a variable block size based 348 * on the offset. 349 */ 350 while (uio->uio_resid > 0) { 351 int fixsize = 0; 352 int blksize; 353 int blkmask; 354 355 if ((error = hammer_checkspace(hmp, HAMMER_CHECKSPACE_SLOP_WRITE)) != 0) 356 break; 357 358 blksize = hammer_blocksize(uio->uio_offset); 359 360 /* 361 * Do not allow HAMMER to blow out the buffer cache. Very 362 * large UIOs can lockout other processes due to bwillwrite() 363 * mechanics. 364 * 365 * The hammer inode is not locked during these operations. 366 * The vnode is locked which can interfere with the pageout 367 * daemon for non-UIO_NOCOPY writes but should not interfere 368 * with the buffer cache. Even so, we cannot afford to 369 * allow the pageout daemon to build up too many dirty buffer 370 * cache buffers. 371 */ 372 /*if (((int)uio->uio_offset & (blksize - 1)) == 0)*/ 373 bwillwrite(blksize); 374 375 /* 376 * Do not allow HAMMER to blow out system memory by 377 * accumulating too many records. Records are so well 378 * decoupled from the buffer cache that it is possible 379 * for userland to push data out to the media via 380 * direct-write, but build up the records queued to the 381 * backend faster then the backend can flush them out. 382 * HAMMER has hit its write limit but the frontend has 383 * no pushback to slow it down. 384 */ 385 if (hmp->rsv_recs > hammer_limit_recs / 2) { 386 /* 387 * Get the inode on the flush list 388 */ 389 if (ip->rsv_recs >= 64) 390 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 391 else if (ip->rsv_recs >= 16) 392 hammer_flush_inode(ip, 0); 393 394 /* 395 * Keep the flusher going if the system keeps 396 * queueing records. 397 */ 398 delta = hmp->count_newrecords - 399 hmp->last_newrecords; 400 if (delta < 0 || delta > hammer_limit_recs / 2) { 401 hmp->last_newrecords = hmp->count_newrecords; 402 hammer_sync_hmp(hmp, MNT_NOWAIT); 403 } 404 405 /* 406 * If we have gotten behind start slowing 407 * down the writers. 408 */ 409 delta = (hmp->rsv_recs - hammer_limit_recs) * 410 hz / hammer_limit_recs; 411 if (delta > 0) 412 tsleep(&trans, 0, "hmrslo", delta); 413 } 414 415 /* 416 * Calculate the blocksize at the current offset and figure 417 * out how much we can actually write. 418 */ 419 blkmask = blksize - 1; 420 offset = (int)uio->uio_offset & blkmask; 421 base_offset = uio->uio_offset & ~(int64_t)blkmask; 422 n = blksize - offset; 423 if (n > uio->uio_resid) 424 n = uio->uio_resid; 425 if (uio->uio_offset + n > ip->ino_data.size) { 426 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n); 427 fixsize = 1; 428 } 429 430 if (uio->uio_segflg == UIO_NOCOPY) { 431 /* 432 * Issuing a write with the same data backing the 433 * buffer. Instantiate the buffer to collect the 434 * backing vm pages, then read-in any missing bits. 435 * 436 * This case is used by vop_stdputpages(). 437 */ 438 bp = getblk(ap->a_vp, base_offset, 439 blksize, GETBLK_BHEAVY, 0); 440 if ((bp->b_flags & B_CACHE) == 0) { 441 bqrelse(bp); 442 error = bread(ap->a_vp, base_offset, 443 blksize, &bp); 444 } 445 } else if (offset == 0 && uio->uio_resid >= blksize) { 446 /* 447 * Even though we are entirely overwriting the buffer 448 * we may still have to zero it out to avoid a 449 * mmap/write visibility issue. 450 */ 451 bp = getblk(ap->a_vp, base_offset, blksize, GETBLK_BHEAVY, 0); 452 if ((bp->b_flags & B_CACHE) == 0) 453 vfs_bio_clrbuf(bp); 454 } else if (base_offset >= ip->ino_data.size) { 455 /* 456 * If the base offset of the buffer is beyond the 457 * file EOF, we don't have to issue a read. 458 */ 459 bp = getblk(ap->a_vp, base_offset, 460 blksize, GETBLK_BHEAVY, 0); 461 vfs_bio_clrbuf(bp); 462 } else { 463 /* 464 * Partial overwrite, read in any missing bits then 465 * replace the portion being written. 466 */ 467 error = bread(ap->a_vp, base_offset, blksize, &bp); 468 if (error == 0) 469 bheavy(bp); 470 } 471 if (error == 0) { 472 error = uiomove((char *)bp->b_data + offset, 473 n, uio); 474 } 475 476 /* 477 * If we screwed up we have to undo any VM size changes we 478 * made. 479 */ 480 if (error) { 481 brelse(bp); 482 if (fixsize) { 483 vtruncbuf(ap->a_vp, ip->ino_data.size, 484 hammer_blocksize(ip->ino_data.size)); 485 } 486 break; 487 } 488 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 489 if (ip->ino_data.size < uio->uio_offset) { 490 ip->ino_data.size = uio->uio_offset; 491 flags = HAMMER_INODE_DDIRTY; 492 vnode_pager_setsize(ap->a_vp, ip->ino_data.size); 493 } else { 494 flags = 0; 495 } 496 ip->ino_data.mtime = trans.time; 497 flags |= HAMMER_INODE_MTIME | HAMMER_INODE_BUFS; 498 hammer_modify_inode(ip, flags); 499 500 /* 501 * Final buffer disposition. 502 */ 503 bp->b_flags |= B_AGE; 504 if (ap->a_ioflag & IO_SYNC) { 505 bwrite(bp); 506 } else if (ap->a_ioflag & IO_DIRECT) { 507 bawrite(bp); 508 } else { 509 bdwrite(bp); 510 } 511 } 512 hammer_done_transaction(&trans); 513 return (error); 514 } 515 516 /* 517 * hammer_vop_access { vp, mode, cred } 518 */ 519 static 520 int 521 hammer_vop_access(struct vop_access_args *ap) 522 { 523 struct hammer_inode *ip = VTOI(ap->a_vp); 524 uid_t uid; 525 gid_t gid; 526 int error; 527 528 uid = hammer_to_unix_xid(&ip->ino_data.uid); 529 gid = hammer_to_unix_xid(&ip->ino_data.gid); 530 531 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode, 532 ip->ino_data.uflags); 533 return (error); 534 } 535 536 /* 537 * hammer_vop_advlock { vp, id, op, fl, flags } 538 */ 539 static 540 int 541 hammer_vop_advlock(struct vop_advlock_args *ap) 542 { 543 hammer_inode_t ip = VTOI(ap->a_vp); 544 545 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size)); 546 } 547 548 /* 549 * hammer_vop_close { vp, fflag } 550 */ 551 static 552 int 553 hammer_vop_close(struct vop_close_args *ap) 554 { 555 hammer_inode_t ip = VTOI(ap->a_vp); 556 557 if ((ip->flags | ip->sync_flags) & HAMMER_INODE_MODMASK) 558 hammer_inode_waitreclaims(ip->hmp); 559 return (vop_stdclose(ap)); 560 } 561 562 /* 563 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap } 564 * 565 * The operating system has already ensured that the directory entry 566 * does not exist and done all appropriate namespace locking. 567 */ 568 static 569 int 570 hammer_vop_ncreate(struct vop_ncreate_args *ap) 571 { 572 struct hammer_transaction trans; 573 struct hammer_inode *dip; 574 struct hammer_inode *nip; 575 struct nchandle *nch; 576 int error; 577 578 nch = ap->a_nch; 579 dip = VTOI(ap->a_dvp); 580 581 if (dip->flags & HAMMER_INODE_RO) 582 return (EROFS); 583 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_CREATE)) != 0) 584 return (error); 585 586 /* 587 * Create a transaction to cover the operations we perform. 588 */ 589 hammer_start_transaction(&trans, dip->hmp); 590 591 /* 592 * Create a new filesystem object of the requested type. The 593 * returned inode will be referenced and shared-locked to prevent 594 * it from being moved to the flusher. 595 */ 596 597 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 598 dip, NULL, &nip); 599 if (error) { 600 hkprintf("hammer_create_inode error %d\n", error); 601 hammer_done_transaction(&trans); 602 *ap->a_vpp = NULL; 603 return (error); 604 } 605 606 /* 607 * Add the new filesystem object to the directory. This will also 608 * bump the inode's link count. 609 */ 610 error = hammer_ip_add_directory(&trans, dip, 611 nch->ncp->nc_name, nch->ncp->nc_nlen, 612 nip); 613 if (error) 614 hkprintf("hammer_ip_add_directory error %d\n", error); 615 616 /* 617 * Finish up. 618 */ 619 if (error) { 620 hammer_rel_inode(nip, 0); 621 hammer_done_transaction(&trans); 622 *ap->a_vpp = NULL; 623 } else { 624 error = hammer_get_vnode(nip, ap->a_vpp); 625 hammer_done_transaction(&trans); 626 hammer_rel_inode(nip, 0); 627 if (error == 0) { 628 cache_setunresolved(ap->a_nch); 629 cache_setvp(ap->a_nch, *ap->a_vpp); 630 } 631 } 632 return (error); 633 } 634 635 /* 636 * hammer_vop_getattr { vp, vap } 637 * 638 * Retrieve an inode's attribute information. When accessing inodes 639 * historically we fake the atime field to ensure consistent results. 640 * The atime field is stored in the B-Tree element and allowed to be 641 * updated without cycling the element. 642 */ 643 static 644 int 645 hammer_vop_getattr(struct vop_getattr_args *ap) 646 { 647 struct hammer_inode *ip = VTOI(ap->a_vp); 648 struct vattr *vap = ap->a_vap; 649 650 /* 651 * We want the fsid to be different when accessing a filesystem 652 * with different as-of's so programs like diff don't think 653 * the files are the same. 654 * 655 * We also want the fsid to be the same when comparing snapshots, 656 * or when comparing mirrors (which might be backed by different 657 * physical devices). HAMMER fsids are based on the PFS's 658 * shared_uuid field. 659 * 660 * XXX there is a chance of collision here. The va_fsid reported 661 * by stat is different from the more involved fsid used in the 662 * mount structure. 663 */ 664 vap->va_fsid = ip->pfsm->fsid_udev ^ (u_int32_t)ip->obj_asof ^ 665 (u_int32_t)(ip->obj_asof >> 32); 666 667 vap->va_fileid = ip->ino_leaf.base.obj_id; 668 vap->va_mode = ip->ino_data.mode; 669 vap->va_nlink = ip->ino_data.nlinks; 670 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid); 671 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid); 672 vap->va_rmajor = 0; 673 vap->va_rminor = 0; 674 vap->va_size = ip->ino_data.size; 675 676 /* 677 * We must provide a consistent atime and mtime for snapshots 678 * so people can do a 'tar cf - ... | md5' on them and get 679 * consistent results. 680 */ 681 if (ip->flags & HAMMER_INODE_RO) { 682 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_atime); 683 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_mtime); 684 } else { 685 hammer_time_to_timespec(ip->ino_data.atime, &vap->va_atime); 686 hammer_time_to_timespec(ip->ino_data.mtime, &vap->va_mtime); 687 } 688 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_ctime); 689 vap->va_flags = ip->ino_data.uflags; 690 vap->va_gen = 1; /* hammer inums are unique for all time */ 691 vap->va_blocksize = HAMMER_BUFSIZE; 692 if (ip->ino_data.size >= HAMMER_XDEMARC) { 693 vap->va_bytes = (ip->ino_data.size + HAMMER_XBUFMASK64) & 694 ~HAMMER_XBUFMASK64; 695 } else if (ip->ino_data.size > HAMMER_BUFSIZE / 2) { 696 vap->va_bytes = (ip->ino_data.size + HAMMER_BUFMASK64) & 697 ~HAMMER_BUFMASK64; 698 } else { 699 vap->va_bytes = (ip->ino_data.size + 15) & ~15; 700 } 701 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type); 702 vap->va_filerev = 0; /* XXX */ 703 /* mtime uniquely identifies any adjustments made to the file XXX */ 704 vap->va_fsmid = ip->ino_data.mtime; 705 vap->va_uid_uuid = ip->ino_data.uid; 706 vap->va_gid_uuid = ip->ino_data.gid; 707 vap->va_fsid_uuid = ip->hmp->fsid; 708 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 709 VA_FSID_UUID_VALID; 710 711 switch (ip->ino_data.obj_type) { 712 case HAMMER_OBJTYPE_CDEV: 713 case HAMMER_OBJTYPE_BDEV: 714 vap->va_rmajor = ip->ino_data.rmajor; 715 vap->va_rminor = ip->ino_data.rminor; 716 break; 717 default: 718 break; 719 } 720 return(0); 721 } 722 723 /* 724 * hammer_vop_nresolve { nch, dvp, cred } 725 * 726 * Locate the requested directory entry. 727 */ 728 static 729 int 730 hammer_vop_nresolve(struct vop_nresolve_args *ap) 731 { 732 struct hammer_transaction trans; 733 struct namecache *ncp; 734 hammer_inode_t dip; 735 hammer_inode_t ip; 736 hammer_tid_t asof; 737 struct hammer_cursor cursor; 738 struct vnode *vp; 739 int64_t namekey; 740 int error; 741 int i; 742 int nlen; 743 int flags; 744 int ispfs; 745 int64_t obj_id; 746 u_int32_t localization; 747 748 /* 749 * Misc initialization, plus handle as-of name extensions. Look for 750 * the '@@' extension. Note that as-of files and directories cannot 751 * be modified. 752 */ 753 dip = VTOI(ap->a_dvp); 754 ncp = ap->a_nch->ncp; 755 asof = dip->obj_asof; 756 nlen = ncp->nc_nlen; 757 flags = dip->flags & HAMMER_INODE_RO; 758 ispfs = 0; 759 760 hammer_simple_transaction(&trans, dip->hmp); 761 762 for (i = 0; i < nlen; ++i) { 763 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') { 764 asof = hammer_str_to_tid(ncp->nc_name + i + 2, 765 &ispfs, &localization); 766 if (asof != HAMMER_MAX_TID) 767 flags |= HAMMER_INODE_RO; 768 break; 769 } 770 } 771 nlen = i; 772 773 /* 774 * If this is a PFS softlink we dive into the PFS 775 */ 776 if (ispfs && nlen == 0) { 777 ip = hammer_get_inode(&trans, dip, HAMMER_OBJID_ROOT, 778 asof, localization, 779 flags, &error); 780 if (error == 0) { 781 error = hammer_get_vnode(ip, &vp); 782 hammer_rel_inode(ip, 0); 783 } else { 784 vp = NULL; 785 } 786 if (error == 0) { 787 vn_unlock(vp); 788 cache_setvp(ap->a_nch, vp); 789 vrele(vp); 790 } 791 goto done; 792 } 793 794 /* 795 * If there is no path component the time extension is relative to 796 * dip. 797 */ 798 if (nlen == 0) { 799 ip = hammer_get_inode(&trans, dip, dip->obj_id, 800 asof, dip->obj_localization, 801 flags, &error); 802 if (error == 0) { 803 error = hammer_get_vnode(ip, &vp); 804 hammer_rel_inode(ip, 0); 805 } else { 806 vp = NULL; 807 } 808 if (error == 0) { 809 vn_unlock(vp); 810 cache_setvp(ap->a_nch, vp); 811 vrele(vp); 812 } 813 goto done; 814 } 815 816 /* 817 * Calculate the namekey and setup the key range for the scan. This 818 * works kinda like a chained hash table where the lower 32 bits 819 * of the namekey synthesize the chain. 820 * 821 * The key range is inclusive of both key_beg and key_end. 822 */ 823 namekey = hammer_directory_namekey(ncp->nc_name, nlen); 824 825 error = hammer_init_cursor(&trans, &cursor, &dip->cache[1], dip); 826 cursor.key_beg.localization = dip->obj_localization + 827 HAMMER_LOCALIZE_MISC; 828 cursor.key_beg.obj_id = dip->obj_id; 829 cursor.key_beg.key = namekey; 830 cursor.key_beg.create_tid = 0; 831 cursor.key_beg.delete_tid = 0; 832 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 833 cursor.key_beg.obj_type = 0; 834 835 cursor.key_end = cursor.key_beg; 836 cursor.key_end.key |= 0xFFFFFFFFULL; 837 cursor.asof = asof; 838 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 839 840 /* 841 * Scan all matching records (the chain), locate the one matching 842 * the requested path component. 843 * 844 * The hammer_ip_*() functions merge in-memory records with on-disk 845 * records for the purposes of the search. 846 */ 847 obj_id = 0; 848 localization = HAMMER_DEF_LOCALIZATION; 849 850 if (error == 0) { 851 error = hammer_ip_first(&cursor); 852 while (error == 0) { 853 error = hammer_ip_resolve_data(&cursor); 854 if (error) 855 break; 856 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF && 857 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) { 858 obj_id = cursor.data->entry.obj_id; 859 localization = cursor.data->entry.localization; 860 break; 861 } 862 error = hammer_ip_next(&cursor); 863 } 864 } 865 hammer_done_cursor(&cursor); 866 if (error == 0) { 867 ip = hammer_get_inode(&trans, dip, obj_id, 868 asof, localization, 869 flags, &error); 870 if (error == 0) { 871 error = hammer_get_vnode(ip, &vp); 872 hammer_rel_inode(ip, 0); 873 } else { 874 vp = NULL; 875 } 876 if (error == 0) { 877 vn_unlock(vp); 878 cache_setvp(ap->a_nch, vp); 879 vrele(vp); 880 } 881 } else if (error == ENOENT) { 882 cache_setvp(ap->a_nch, NULL); 883 } 884 done: 885 hammer_done_transaction(&trans); 886 return (error); 887 } 888 889 /* 890 * hammer_vop_nlookupdotdot { dvp, vpp, cred } 891 * 892 * Locate the parent directory of a directory vnode. 893 * 894 * dvp is referenced but not locked. *vpp must be returned referenced and 895 * locked. A parent_obj_id of 0 does not necessarily indicate that we are 896 * at the root, instead it could indicate that the directory we were in was 897 * removed. 898 * 899 * NOTE: as-of sequences are not linked into the directory structure. If 900 * we are at the root with a different asof then the mount point, reload 901 * the same directory with the mount point's asof. I'm not sure what this 902 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not 903 * get confused, but it hasn't been tested. 904 */ 905 static 906 int 907 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 908 { 909 struct hammer_transaction trans; 910 struct hammer_inode *dip; 911 struct hammer_inode *ip; 912 int64_t parent_obj_id; 913 u_int32_t parent_obj_localization; 914 hammer_tid_t asof; 915 int error; 916 917 dip = VTOI(ap->a_dvp); 918 asof = dip->obj_asof; 919 920 /* 921 * Whos are parent? This could be the root of a pseudo-filesystem 922 * whos parent is in another localization domain. 923 */ 924 parent_obj_id = dip->ino_data.parent_obj_id; 925 if (dip->obj_id == HAMMER_OBJID_ROOT) 926 parent_obj_localization = dip->ino_data.ext.obj.parent_obj_localization; 927 else 928 parent_obj_localization = dip->obj_localization; 929 930 if (parent_obj_id == 0) { 931 if (dip->obj_id == HAMMER_OBJID_ROOT && 932 asof != dip->hmp->asof) { 933 parent_obj_id = dip->obj_id; 934 asof = dip->hmp->asof; 935 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK); 936 ksnprintf(*ap->a_fakename, 19, "0x%016llx", 937 dip->obj_asof); 938 } else { 939 *ap->a_vpp = NULL; 940 return ENOENT; 941 } 942 } 943 944 hammer_simple_transaction(&trans, dip->hmp); 945 946 ip = hammer_get_inode(&trans, dip, parent_obj_id, 947 asof, parent_obj_localization, 948 dip->flags, &error); 949 if (ip) { 950 error = hammer_get_vnode(ip, ap->a_vpp); 951 hammer_rel_inode(ip, 0); 952 } else { 953 *ap->a_vpp = NULL; 954 } 955 hammer_done_transaction(&trans); 956 return (error); 957 } 958 959 /* 960 * hammer_vop_nlink { nch, dvp, vp, cred } 961 */ 962 static 963 int 964 hammer_vop_nlink(struct vop_nlink_args *ap) 965 { 966 struct hammer_transaction trans; 967 struct hammer_inode *dip; 968 struct hammer_inode *ip; 969 struct nchandle *nch; 970 int error; 971 972 nch = ap->a_nch; 973 dip = VTOI(ap->a_dvp); 974 ip = VTOI(ap->a_vp); 975 976 if (dip->flags & HAMMER_INODE_RO) 977 return (EROFS); 978 if (ip->flags & HAMMER_INODE_RO) 979 return (EROFS); 980 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_CREATE)) != 0) 981 return (error); 982 983 /* 984 * Create a transaction to cover the operations we perform. 985 */ 986 hammer_start_transaction(&trans, dip->hmp); 987 988 /* 989 * Add the filesystem object to the directory. Note that neither 990 * dip nor ip are referenced or locked, but their vnodes are 991 * referenced. This function will bump the inode's link count. 992 */ 993 error = hammer_ip_add_directory(&trans, dip, 994 nch->ncp->nc_name, nch->ncp->nc_nlen, 995 ip); 996 997 /* 998 * Finish up. 999 */ 1000 if (error == 0) { 1001 cache_setunresolved(nch); 1002 cache_setvp(nch, ap->a_vp); 1003 } 1004 hammer_done_transaction(&trans); 1005 return (error); 1006 } 1007 1008 /* 1009 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap } 1010 * 1011 * The operating system has already ensured that the directory entry 1012 * does not exist and done all appropriate namespace locking. 1013 */ 1014 static 1015 int 1016 hammer_vop_nmkdir(struct vop_nmkdir_args *ap) 1017 { 1018 struct hammer_transaction trans; 1019 struct hammer_inode *dip; 1020 struct hammer_inode *nip; 1021 struct nchandle *nch; 1022 int error; 1023 1024 nch = ap->a_nch; 1025 dip = VTOI(ap->a_dvp); 1026 1027 if (dip->flags & HAMMER_INODE_RO) 1028 return (EROFS); 1029 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_CREATE)) != 0) 1030 return (error); 1031 1032 /* 1033 * Create a transaction to cover the operations we perform. 1034 */ 1035 hammer_start_transaction(&trans, dip->hmp); 1036 1037 /* 1038 * Create a new filesystem object of the requested type. The 1039 * returned inode will be referenced but not locked. 1040 */ 1041 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1042 dip, NULL, &nip); 1043 if (error) { 1044 hkprintf("hammer_mkdir error %d\n", error); 1045 hammer_done_transaction(&trans); 1046 *ap->a_vpp = NULL; 1047 return (error); 1048 } 1049 /* 1050 * Add the new filesystem object to the directory. This will also 1051 * bump the inode's link count. 1052 */ 1053 error = hammer_ip_add_directory(&trans, dip, 1054 nch->ncp->nc_name, nch->ncp->nc_nlen, 1055 nip); 1056 if (error) 1057 hkprintf("hammer_mkdir (add) error %d\n", error); 1058 1059 /* 1060 * Finish up. 1061 */ 1062 if (error) { 1063 hammer_rel_inode(nip, 0); 1064 *ap->a_vpp = NULL; 1065 } else { 1066 error = hammer_get_vnode(nip, ap->a_vpp); 1067 hammer_rel_inode(nip, 0); 1068 if (error == 0) { 1069 cache_setunresolved(ap->a_nch); 1070 cache_setvp(ap->a_nch, *ap->a_vpp); 1071 } 1072 } 1073 hammer_done_transaction(&trans); 1074 return (error); 1075 } 1076 1077 /* 1078 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap } 1079 * 1080 * The operating system has already ensured that the directory entry 1081 * does not exist and done all appropriate namespace locking. 1082 */ 1083 static 1084 int 1085 hammer_vop_nmknod(struct vop_nmknod_args *ap) 1086 { 1087 struct hammer_transaction trans; 1088 struct hammer_inode *dip; 1089 struct hammer_inode *nip; 1090 struct nchandle *nch; 1091 int error; 1092 1093 nch = ap->a_nch; 1094 dip = VTOI(ap->a_dvp); 1095 1096 if (dip->flags & HAMMER_INODE_RO) 1097 return (EROFS); 1098 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_CREATE)) != 0) 1099 return (error); 1100 1101 /* 1102 * Create a transaction to cover the operations we perform. 1103 */ 1104 hammer_start_transaction(&trans, dip->hmp); 1105 1106 /* 1107 * Create a new filesystem object of the requested type. The 1108 * returned inode will be referenced but not locked. 1109 * 1110 * If mknod specifies a directory a pseudo-fs is created. 1111 */ 1112 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1113 dip, NULL, &nip); 1114 if (error) { 1115 hammer_done_transaction(&trans); 1116 *ap->a_vpp = NULL; 1117 return (error); 1118 } 1119 1120 /* 1121 * Add the new filesystem object to the directory. This will also 1122 * bump the inode's link count. 1123 */ 1124 error = hammer_ip_add_directory(&trans, dip, 1125 nch->ncp->nc_name, nch->ncp->nc_nlen, 1126 nip); 1127 1128 /* 1129 * Finish up. 1130 */ 1131 if (error) { 1132 hammer_rel_inode(nip, 0); 1133 *ap->a_vpp = NULL; 1134 } else { 1135 error = hammer_get_vnode(nip, ap->a_vpp); 1136 hammer_rel_inode(nip, 0); 1137 if (error == 0) { 1138 cache_setunresolved(ap->a_nch); 1139 cache_setvp(ap->a_nch, *ap->a_vpp); 1140 } 1141 } 1142 hammer_done_transaction(&trans); 1143 return (error); 1144 } 1145 1146 /* 1147 * hammer_vop_open { vp, mode, cred, fp } 1148 */ 1149 static 1150 int 1151 hammer_vop_open(struct vop_open_args *ap) 1152 { 1153 hammer_inode_t ip; 1154 1155 ip = VTOI(ap->a_vp); 1156 1157 if ((ap->a_mode & FWRITE) && (ip->flags & HAMMER_INODE_RO)) 1158 return (EROFS); 1159 return(vop_stdopen(ap)); 1160 } 1161 1162 /* 1163 * hammer_vop_pathconf { vp, name, retval } 1164 */ 1165 static 1166 int 1167 hammer_vop_pathconf(struct vop_pathconf_args *ap) 1168 { 1169 return EOPNOTSUPP; 1170 } 1171 1172 /* 1173 * hammer_vop_print { vp } 1174 */ 1175 static 1176 int 1177 hammer_vop_print(struct vop_print_args *ap) 1178 { 1179 return EOPNOTSUPP; 1180 } 1181 1182 /* 1183 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies } 1184 */ 1185 static 1186 int 1187 hammer_vop_readdir(struct vop_readdir_args *ap) 1188 { 1189 struct hammer_transaction trans; 1190 struct hammer_cursor cursor; 1191 struct hammer_inode *ip; 1192 struct uio *uio; 1193 hammer_base_elm_t base; 1194 int error; 1195 int cookie_index; 1196 int ncookies; 1197 off_t *cookies; 1198 off_t saveoff; 1199 int r; 1200 int dtype; 1201 1202 ip = VTOI(ap->a_vp); 1203 uio = ap->a_uio; 1204 saveoff = uio->uio_offset; 1205 1206 if (ap->a_ncookies) { 1207 ncookies = uio->uio_resid / 16 + 1; 1208 if (ncookies > 1024) 1209 ncookies = 1024; 1210 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 1211 cookie_index = 0; 1212 } else { 1213 ncookies = -1; 1214 cookies = NULL; 1215 cookie_index = 0; 1216 } 1217 1218 hammer_simple_transaction(&trans, ip->hmp); 1219 1220 /* 1221 * Handle artificial entries 1222 */ 1223 error = 0; 1224 if (saveoff == 0) { 1225 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, "."); 1226 if (r) 1227 goto done; 1228 if (cookies) 1229 cookies[cookie_index] = saveoff; 1230 ++saveoff; 1231 ++cookie_index; 1232 if (cookie_index == ncookies) 1233 goto done; 1234 } 1235 if (saveoff == 1) { 1236 if (ip->ino_data.parent_obj_id) { 1237 r = vop_write_dirent(&error, uio, 1238 ip->ino_data.parent_obj_id, 1239 DT_DIR, 2, ".."); 1240 } else { 1241 r = vop_write_dirent(&error, uio, 1242 ip->obj_id, DT_DIR, 2, ".."); 1243 } 1244 if (r) 1245 goto done; 1246 if (cookies) 1247 cookies[cookie_index] = saveoff; 1248 ++saveoff; 1249 ++cookie_index; 1250 if (cookie_index == ncookies) 1251 goto done; 1252 } 1253 1254 /* 1255 * Key range (begin and end inclusive) to scan. Directory keys 1256 * directly translate to a 64 bit 'seek' position. 1257 */ 1258 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 1259 cursor.key_beg.localization = ip->obj_localization + 1260 HAMMER_LOCALIZE_MISC; 1261 cursor.key_beg.obj_id = ip->obj_id; 1262 cursor.key_beg.create_tid = 0; 1263 cursor.key_beg.delete_tid = 0; 1264 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1265 cursor.key_beg.obj_type = 0; 1266 cursor.key_beg.key = saveoff; 1267 1268 cursor.key_end = cursor.key_beg; 1269 cursor.key_end.key = HAMMER_MAX_KEY; 1270 cursor.asof = ip->obj_asof; 1271 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1272 1273 error = hammer_ip_first(&cursor); 1274 1275 while (error == 0) { 1276 error = hammer_ip_resolve_data(&cursor); 1277 if (error) 1278 break; 1279 base = &cursor.leaf->base; 1280 saveoff = base->key; 1281 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF); 1282 1283 if (base->obj_id != ip->obj_id) 1284 panic("readdir: bad record at %p", cursor.node); 1285 1286 /* 1287 * Convert pseudo-filesystems into softlinks 1288 */ 1289 dtype = hammer_get_dtype(cursor.leaf->base.obj_type); 1290 r = vop_write_dirent( 1291 &error, uio, cursor.data->entry.obj_id, 1292 dtype, 1293 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF , 1294 (void *)cursor.data->entry.name); 1295 if (r) 1296 break; 1297 ++saveoff; 1298 if (cookies) 1299 cookies[cookie_index] = base->key; 1300 ++cookie_index; 1301 if (cookie_index == ncookies) 1302 break; 1303 error = hammer_ip_next(&cursor); 1304 } 1305 hammer_done_cursor(&cursor); 1306 1307 done: 1308 hammer_done_transaction(&trans); 1309 1310 if (ap->a_eofflag) 1311 *ap->a_eofflag = (error == ENOENT); 1312 uio->uio_offset = saveoff; 1313 if (error && cookie_index == 0) { 1314 if (error == ENOENT) 1315 error = 0; 1316 if (cookies) { 1317 kfree(cookies, M_TEMP); 1318 *ap->a_ncookies = 0; 1319 *ap->a_cookies = NULL; 1320 } 1321 } else { 1322 if (error == ENOENT) 1323 error = 0; 1324 if (cookies) { 1325 *ap->a_ncookies = cookie_index; 1326 *ap->a_cookies = cookies; 1327 } 1328 } 1329 return(error); 1330 } 1331 1332 /* 1333 * hammer_vop_readlink { vp, uio, cred } 1334 */ 1335 static 1336 int 1337 hammer_vop_readlink(struct vop_readlink_args *ap) 1338 { 1339 struct hammer_transaction trans; 1340 struct hammer_cursor cursor; 1341 struct hammer_inode *ip; 1342 char buf[32]; 1343 u_int32_t localization; 1344 hammer_pseudofs_inmem_t pfsm; 1345 int error; 1346 1347 ip = VTOI(ap->a_vp); 1348 1349 /* 1350 * Special softlink for PFS access, created by hammer pfs-create 1351 */ 1352 1353 if (ip->obj_id == HAMMER_OBJID_ROOT && ip->obj_localization && 1354 ip->obj_asof == HAMMER_MAX_TID) { 1355 ksnprintf(buf, sizeof(buf), "@@0x%016llx:0x%04x", 1356 ip->pfsm->pfsd.sync_end_tid, 1357 ip->obj_localization >> 16); 1358 error = uiomove(buf, strlen(buf), ap->a_uio); 1359 return(error); 1360 } 1361 1362 /* 1363 * Shortcut if the symlink data was stuffed into ino_data. 1364 * 1365 * Also expand special @@PFSxxxxx softlinks. 1366 */ 1367 if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) { 1368 char *ptr; 1369 int bytes; 1370 1371 ptr = ip->ino_data.ext.symlink; 1372 bytes = (int)ip->ino_data.size; 1373 if (bytes == 10 && strncmp(ptr, "@@PFS", 5) == 0) { 1374 hammer_simple_transaction(&trans, ip->hmp); 1375 bcopy(ptr + 5, buf, 5); 1376 buf[5] = 0; 1377 localization = strtoul(buf, NULL, 10) << 16; 1378 pfsm = hammer_load_pseudofs(&trans, localization, 1379 &error); 1380 if (error == 0) { 1381 ksnprintf(buf, sizeof(buf), 1382 "@@0x%016llx:%05d", 1383 pfsm->pfsd.sync_end_tid, 1384 localization >> 16); 1385 ptr = buf; 1386 bytes = strlen(buf); 1387 } 1388 if (pfsm) 1389 hammer_rel_pseudofs(trans.hmp, pfsm); 1390 hammer_done_transaction(&trans); 1391 } 1392 error = uiomove(ptr, bytes, ap->a_uio); 1393 return(error); 1394 } 1395 1396 /* 1397 * Long version 1398 */ 1399 hammer_simple_transaction(&trans, ip->hmp); 1400 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 1401 1402 /* 1403 * Key range (begin and end inclusive) to scan. Directory keys 1404 * directly translate to a 64 bit 'seek' position. 1405 */ 1406 cursor.key_beg.localization = ip->obj_localization + 1407 HAMMER_LOCALIZE_MISC; 1408 cursor.key_beg.obj_id = ip->obj_id; 1409 cursor.key_beg.create_tid = 0; 1410 cursor.key_beg.delete_tid = 0; 1411 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX; 1412 cursor.key_beg.obj_type = 0; 1413 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK; 1414 cursor.asof = ip->obj_asof; 1415 cursor.flags |= HAMMER_CURSOR_ASOF; 1416 1417 error = hammer_ip_lookup(&cursor); 1418 if (error == 0) { 1419 error = hammer_ip_resolve_data(&cursor); 1420 if (error == 0) { 1421 KKASSERT(cursor.leaf->data_len >= 1422 HAMMER_SYMLINK_NAME_OFF); 1423 error = uiomove(cursor.data->symlink.name, 1424 cursor.leaf->data_len - 1425 HAMMER_SYMLINK_NAME_OFF, 1426 ap->a_uio); 1427 } 1428 } 1429 hammer_done_cursor(&cursor); 1430 hammer_done_transaction(&trans); 1431 return(error); 1432 } 1433 1434 /* 1435 * hammer_vop_nremove { nch, dvp, cred } 1436 */ 1437 static 1438 int 1439 hammer_vop_nremove(struct vop_nremove_args *ap) 1440 { 1441 struct hammer_transaction trans; 1442 struct hammer_inode *dip; 1443 int error; 1444 1445 dip = VTOI(ap->a_dvp); 1446 1447 if (hammer_nohistory(dip) == 0 && 1448 (error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_REMOVE)) != 0) { 1449 return (error); 1450 } 1451 1452 hammer_start_transaction(&trans, dip->hmp); 1453 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0); 1454 hammer_done_transaction(&trans); 1455 1456 return (error); 1457 } 1458 1459 /* 1460 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1461 */ 1462 static 1463 int 1464 hammer_vop_nrename(struct vop_nrename_args *ap) 1465 { 1466 struct hammer_transaction trans; 1467 struct namecache *fncp; 1468 struct namecache *tncp; 1469 struct hammer_inode *fdip; 1470 struct hammer_inode *tdip; 1471 struct hammer_inode *ip; 1472 struct hammer_cursor cursor; 1473 int64_t namekey; 1474 int nlen, error; 1475 1476 fdip = VTOI(ap->a_fdvp); 1477 tdip = VTOI(ap->a_tdvp); 1478 fncp = ap->a_fnch->ncp; 1479 tncp = ap->a_tnch->ncp; 1480 ip = VTOI(fncp->nc_vp); 1481 KKASSERT(ip != NULL); 1482 1483 if (fdip->flags & HAMMER_INODE_RO) 1484 return (EROFS); 1485 if (tdip->flags & HAMMER_INODE_RO) 1486 return (EROFS); 1487 if (ip->flags & HAMMER_INODE_RO) 1488 return (EROFS); 1489 if ((error = hammer_checkspace(fdip->hmp, HAMMER_CHECKSPACE_SLOP_CREATE)) != 0) 1490 return (error); 1491 1492 hammer_start_transaction(&trans, fdip->hmp); 1493 1494 /* 1495 * Remove tncp from the target directory and then link ip as 1496 * tncp. XXX pass trans to dounlink 1497 * 1498 * Force the inode sync-time to match the transaction so it is 1499 * in-sync with the creation of the target directory entry. 1500 */ 1501 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0); 1502 if (error == 0 || error == ENOENT) { 1503 error = hammer_ip_add_directory(&trans, tdip, 1504 tncp->nc_name, tncp->nc_nlen, 1505 ip); 1506 if (error == 0) { 1507 ip->ino_data.parent_obj_id = tdip->obj_id; 1508 hammer_modify_inode(ip, HAMMER_INODE_DDIRTY); 1509 } 1510 } 1511 if (error) 1512 goto failed; /* XXX */ 1513 1514 /* 1515 * Locate the record in the originating directory and remove it. 1516 * 1517 * Calculate the namekey and setup the key range for the scan. This 1518 * works kinda like a chained hash table where the lower 32 bits 1519 * of the namekey synthesize the chain. 1520 * 1521 * The key range is inclusive of both key_beg and key_end. 1522 */ 1523 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen); 1524 retry: 1525 hammer_init_cursor(&trans, &cursor, &fdip->cache[1], fdip); 1526 cursor.key_beg.localization = fdip->obj_localization + 1527 HAMMER_LOCALIZE_MISC; 1528 cursor.key_beg.obj_id = fdip->obj_id; 1529 cursor.key_beg.key = namekey; 1530 cursor.key_beg.create_tid = 0; 1531 cursor.key_beg.delete_tid = 0; 1532 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1533 cursor.key_beg.obj_type = 0; 1534 1535 cursor.key_end = cursor.key_beg; 1536 cursor.key_end.key |= 0xFFFFFFFFULL; 1537 cursor.asof = fdip->obj_asof; 1538 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1539 1540 /* 1541 * Scan all matching records (the chain), locate the one matching 1542 * the requested path component. 1543 * 1544 * The hammer_ip_*() functions merge in-memory records with on-disk 1545 * records for the purposes of the search. 1546 */ 1547 error = hammer_ip_first(&cursor); 1548 while (error == 0) { 1549 if (hammer_ip_resolve_data(&cursor) != 0) 1550 break; 1551 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF; 1552 KKASSERT(nlen > 0); 1553 if (fncp->nc_nlen == nlen && 1554 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) { 1555 break; 1556 } 1557 error = hammer_ip_next(&cursor); 1558 } 1559 1560 /* 1561 * If all is ok we have to get the inode so we can adjust nlinks. 1562 * 1563 * WARNING: hammer_ip_del_directory() may have to terminate the 1564 * cursor to avoid a recursion. It's ok to call hammer_done_cursor() 1565 * twice. 1566 */ 1567 if (error == 0) 1568 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip); 1569 1570 /* 1571 * XXX A deadlock here will break rename's atomicy for the purposes 1572 * of crash recovery. 1573 */ 1574 if (error == EDEADLK) { 1575 hammer_done_cursor(&cursor); 1576 goto retry; 1577 } 1578 1579 /* 1580 * Cleanup and tell the kernel that the rename succeeded. 1581 */ 1582 hammer_done_cursor(&cursor); 1583 if (error == 0) 1584 cache_rename(ap->a_fnch, ap->a_tnch); 1585 1586 failed: 1587 hammer_done_transaction(&trans); 1588 return (error); 1589 } 1590 1591 /* 1592 * hammer_vop_nrmdir { nch, dvp, cred } 1593 */ 1594 static 1595 int 1596 hammer_vop_nrmdir(struct vop_nrmdir_args *ap) 1597 { 1598 struct hammer_transaction trans; 1599 struct hammer_inode *dip; 1600 int error; 1601 1602 dip = VTOI(ap->a_dvp); 1603 1604 if (hammer_nohistory(dip) == 0 && 1605 (error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_REMOVE)) != 0) { 1606 return (error); 1607 } 1608 1609 hammer_start_transaction(&trans, dip->hmp); 1610 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0); 1611 hammer_done_transaction(&trans); 1612 1613 return (error); 1614 } 1615 1616 /* 1617 * hammer_vop_setattr { vp, vap, cred } 1618 */ 1619 static 1620 int 1621 hammer_vop_setattr(struct vop_setattr_args *ap) 1622 { 1623 struct hammer_transaction trans; 1624 struct vattr *vap; 1625 struct hammer_inode *ip; 1626 int modflags; 1627 int error; 1628 int truncating; 1629 int blksize; 1630 int64_t aligned_size; 1631 u_int32_t flags; 1632 1633 vap = ap->a_vap; 1634 ip = ap->a_vp->v_data; 1635 modflags = 0; 1636 1637 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1638 return(EROFS); 1639 if (ip->flags & HAMMER_INODE_RO) 1640 return (EROFS); 1641 if (hammer_nohistory(ip) == 0 && 1642 (error = hammer_checkspace(ip->hmp, HAMMER_CHECKSPACE_SLOP_REMOVE)) != 0) { 1643 return (error); 1644 } 1645 1646 hammer_start_transaction(&trans, ip->hmp); 1647 error = 0; 1648 1649 if (vap->va_flags != VNOVAL) { 1650 flags = ip->ino_data.uflags; 1651 error = vop_helper_setattr_flags(&flags, vap->va_flags, 1652 hammer_to_unix_xid(&ip->ino_data.uid), 1653 ap->a_cred); 1654 if (error == 0) { 1655 if (ip->ino_data.uflags != flags) { 1656 ip->ino_data.uflags = flags; 1657 modflags |= HAMMER_INODE_DDIRTY; 1658 } 1659 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 1660 error = 0; 1661 goto done; 1662 } 1663 } 1664 goto done; 1665 } 1666 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 1667 error = EPERM; 1668 goto done; 1669 } 1670 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 1671 mode_t cur_mode = ip->ino_data.mode; 1672 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid); 1673 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid); 1674 uuid_t uuid_uid; 1675 uuid_t uuid_gid; 1676 1677 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 1678 ap->a_cred, 1679 &cur_uid, &cur_gid, &cur_mode); 1680 if (error == 0) { 1681 hammer_guid_to_uuid(&uuid_uid, cur_uid); 1682 hammer_guid_to_uuid(&uuid_gid, cur_gid); 1683 if (bcmp(&uuid_uid, &ip->ino_data.uid, 1684 sizeof(uuid_uid)) || 1685 bcmp(&uuid_gid, &ip->ino_data.gid, 1686 sizeof(uuid_gid)) || 1687 ip->ino_data.mode != cur_mode 1688 ) { 1689 ip->ino_data.uid = uuid_uid; 1690 ip->ino_data.gid = uuid_gid; 1691 ip->ino_data.mode = cur_mode; 1692 } 1693 modflags |= HAMMER_INODE_DDIRTY; 1694 } 1695 } 1696 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) { 1697 switch(ap->a_vp->v_type) { 1698 case VREG: 1699 if (vap->va_size == ip->ino_data.size) 1700 break; 1701 /* 1702 * XXX break atomicy, we can deadlock the backend 1703 * if we do not release the lock. Probably not a 1704 * big deal here. 1705 */ 1706 blksize = hammer_blocksize(vap->va_size); 1707 if (vap->va_size < ip->ino_data.size) { 1708 vtruncbuf(ap->a_vp, vap->va_size, blksize); 1709 truncating = 1; 1710 } else { 1711 vnode_pager_setsize(ap->a_vp, vap->va_size); 1712 truncating = 0; 1713 } 1714 ip->ino_data.size = vap->va_size; 1715 modflags |= HAMMER_INODE_DDIRTY; 1716 1717 /* 1718 * on-media truncation is cached in the inode until 1719 * the inode is synchronized. 1720 */ 1721 if (truncating) { 1722 hammer_ip_frontend_trunc(ip, vap->va_size); 1723 #ifdef DEBUG_TRUNCATE 1724 if (HammerTruncIp == NULL) 1725 HammerTruncIp = ip; 1726 #endif 1727 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) { 1728 ip->flags |= HAMMER_INODE_TRUNCATED; 1729 ip->trunc_off = vap->va_size; 1730 #ifdef DEBUG_TRUNCATE 1731 if (ip == HammerTruncIp) 1732 kprintf("truncate1 %016llx\n", ip->trunc_off); 1733 #endif 1734 } else if (ip->trunc_off > vap->va_size) { 1735 ip->trunc_off = vap->va_size; 1736 #ifdef DEBUG_TRUNCATE 1737 if (ip == HammerTruncIp) 1738 kprintf("truncate2 %016llx\n", ip->trunc_off); 1739 #endif 1740 } else { 1741 #ifdef DEBUG_TRUNCATE 1742 if (ip == HammerTruncIp) 1743 kprintf("truncate3 %016llx (ignored)\n", vap->va_size); 1744 #endif 1745 } 1746 } 1747 1748 /* 1749 * If truncating we have to clean out a portion of 1750 * the last block on-disk. We do this in the 1751 * front-end buffer cache. 1752 */ 1753 aligned_size = (vap->va_size + (blksize - 1)) & 1754 ~(int64_t)(blksize - 1); 1755 if (truncating && vap->va_size < aligned_size) { 1756 struct buf *bp; 1757 int offset; 1758 1759 aligned_size -= blksize; 1760 1761 offset = (int)vap->va_size & (blksize - 1); 1762 error = bread(ap->a_vp, aligned_size, 1763 blksize, &bp); 1764 hammer_ip_frontend_trunc(ip, aligned_size); 1765 if (error == 0) { 1766 bzero(bp->b_data + offset, 1767 blksize - offset); 1768 bdwrite(bp); 1769 } else { 1770 kprintf("ERROR %d\n", error); 1771 brelse(bp); 1772 } 1773 } 1774 break; 1775 case VDATABASE: 1776 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) { 1777 ip->flags |= HAMMER_INODE_TRUNCATED; 1778 ip->trunc_off = vap->va_size; 1779 } else if (ip->trunc_off > vap->va_size) { 1780 ip->trunc_off = vap->va_size; 1781 } 1782 hammer_ip_frontend_trunc(ip, vap->va_size); 1783 ip->ino_data.size = vap->va_size; 1784 modflags |= HAMMER_INODE_DDIRTY; 1785 break; 1786 default: 1787 error = EINVAL; 1788 goto done; 1789 } 1790 break; 1791 } 1792 if (vap->va_atime.tv_sec != VNOVAL) { 1793 ip->ino_data.atime = 1794 hammer_timespec_to_time(&vap->va_atime); 1795 modflags |= HAMMER_INODE_ATIME; 1796 } 1797 if (vap->va_mtime.tv_sec != VNOVAL) { 1798 ip->ino_data.mtime = 1799 hammer_timespec_to_time(&vap->va_mtime); 1800 modflags |= HAMMER_INODE_MTIME; 1801 } 1802 if (vap->va_mode != (mode_t)VNOVAL) { 1803 mode_t cur_mode = ip->ino_data.mode; 1804 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid); 1805 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid); 1806 1807 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 1808 cur_uid, cur_gid, &cur_mode); 1809 if (error == 0 && ip->ino_data.mode != cur_mode) { 1810 ip->ino_data.mode = cur_mode; 1811 modflags |= HAMMER_INODE_DDIRTY; 1812 } 1813 } 1814 done: 1815 if (error == 0) 1816 hammer_modify_inode(ip, modflags); 1817 hammer_done_transaction(&trans); 1818 return (error); 1819 } 1820 1821 /* 1822 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1823 */ 1824 static 1825 int 1826 hammer_vop_nsymlink(struct vop_nsymlink_args *ap) 1827 { 1828 struct hammer_transaction trans; 1829 struct hammer_inode *dip; 1830 struct hammer_inode *nip; 1831 struct nchandle *nch; 1832 hammer_record_t record; 1833 int error; 1834 int bytes; 1835 1836 ap->a_vap->va_type = VLNK; 1837 1838 nch = ap->a_nch; 1839 dip = VTOI(ap->a_dvp); 1840 1841 if (dip->flags & HAMMER_INODE_RO) 1842 return (EROFS); 1843 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_CREATE)) != 0) 1844 return (error); 1845 1846 /* 1847 * Create a transaction to cover the operations we perform. 1848 */ 1849 hammer_start_transaction(&trans, dip->hmp); 1850 1851 /* 1852 * Create a new filesystem object of the requested type. The 1853 * returned inode will be referenced but not locked. 1854 */ 1855 1856 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1857 dip, NULL, &nip); 1858 if (error) { 1859 hammer_done_transaction(&trans); 1860 *ap->a_vpp = NULL; 1861 return (error); 1862 } 1863 1864 /* 1865 * Add a record representing the symlink. symlink stores the link 1866 * as pure data, not a string, and is no \0 terminated. 1867 */ 1868 if (error == 0) { 1869 bytes = strlen(ap->a_target); 1870 1871 if (bytes <= HAMMER_INODE_BASESYMLEN) { 1872 bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes); 1873 } else { 1874 record = hammer_alloc_mem_record(nip, bytes); 1875 record->type = HAMMER_MEM_RECORD_GENERAL; 1876 1877 record->leaf.base.localization = nip->obj_localization + 1878 HAMMER_LOCALIZE_MISC; 1879 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK; 1880 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX; 1881 record->leaf.data_len = bytes; 1882 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0); 1883 bcopy(ap->a_target, record->data->symlink.name, bytes); 1884 error = hammer_ip_add_record(&trans, record); 1885 } 1886 1887 /* 1888 * Set the file size to the length of the link. 1889 */ 1890 if (error == 0) { 1891 nip->ino_data.size = bytes; 1892 hammer_modify_inode(nip, HAMMER_INODE_DDIRTY); 1893 } 1894 } 1895 if (error == 0) 1896 error = hammer_ip_add_directory(&trans, dip, nch->ncp->nc_name, 1897 nch->ncp->nc_nlen, nip); 1898 1899 /* 1900 * Finish up. 1901 */ 1902 if (error) { 1903 hammer_rel_inode(nip, 0); 1904 *ap->a_vpp = NULL; 1905 } else { 1906 error = hammer_get_vnode(nip, ap->a_vpp); 1907 hammer_rel_inode(nip, 0); 1908 if (error == 0) { 1909 cache_setunresolved(ap->a_nch); 1910 cache_setvp(ap->a_nch, *ap->a_vpp); 1911 } 1912 } 1913 hammer_done_transaction(&trans); 1914 return (error); 1915 } 1916 1917 /* 1918 * hammer_vop_nwhiteout { nch, dvp, cred, flags } 1919 */ 1920 static 1921 int 1922 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap) 1923 { 1924 struct hammer_transaction trans; 1925 struct hammer_inode *dip; 1926 int error; 1927 1928 dip = VTOI(ap->a_dvp); 1929 1930 if (hammer_nohistory(dip) == 0 && 1931 (error = hammer_checkspace(dip->hmp, HAMMER_CHECKSPACE_SLOP_CREATE)) != 0) { 1932 return (error); 1933 } 1934 1935 hammer_start_transaction(&trans, dip->hmp); 1936 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, 1937 ap->a_cred, ap->a_flags); 1938 hammer_done_transaction(&trans); 1939 1940 return (error); 1941 } 1942 1943 /* 1944 * hammer_vop_ioctl { vp, command, data, fflag, cred } 1945 */ 1946 static 1947 int 1948 hammer_vop_ioctl(struct vop_ioctl_args *ap) 1949 { 1950 struct hammer_inode *ip = ap->a_vp->v_data; 1951 1952 return(hammer_ioctl(ip, ap->a_command, ap->a_data, 1953 ap->a_fflag, ap->a_cred)); 1954 } 1955 1956 static 1957 int 1958 hammer_vop_mountctl(struct vop_mountctl_args *ap) 1959 { 1960 struct mount *mp; 1961 int error; 1962 1963 mp = ap->a_head.a_ops->head.vv_mount; 1964 1965 switch(ap->a_op) { 1966 case MOUNTCTL_SET_EXPORT: 1967 if (ap->a_ctllen != sizeof(struct export_args)) 1968 error = EINVAL; 1969 error = hammer_vfs_export(mp, ap->a_op, 1970 (const struct export_args *)ap->a_ctl); 1971 break; 1972 default: 1973 error = journal_mountctl(ap); 1974 break; 1975 } 1976 return(error); 1977 } 1978 1979 /* 1980 * hammer_vop_strategy { vp, bio } 1981 * 1982 * Strategy call, used for regular file read & write only. Note that the 1983 * bp may represent a cluster. 1984 * 1985 * To simplify operation and allow better optimizations in the future, 1986 * this code does not make any assumptions with regards to buffer alignment 1987 * or size. 1988 */ 1989 static 1990 int 1991 hammer_vop_strategy(struct vop_strategy_args *ap) 1992 { 1993 struct buf *bp; 1994 int error; 1995 1996 bp = ap->a_bio->bio_buf; 1997 1998 switch(bp->b_cmd) { 1999 case BUF_CMD_READ: 2000 error = hammer_vop_strategy_read(ap); 2001 break; 2002 case BUF_CMD_WRITE: 2003 error = hammer_vop_strategy_write(ap); 2004 break; 2005 default: 2006 bp->b_error = error = EINVAL; 2007 bp->b_flags |= B_ERROR; 2008 biodone(ap->a_bio); 2009 break; 2010 } 2011 return (error); 2012 } 2013 2014 /* 2015 * Read from a regular file. Iterate the related records and fill in the 2016 * BIO/BUF. Gaps are zero-filled. 2017 * 2018 * The support code in hammer_object.c should be used to deal with mixed 2019 * in-memory and on-disk records. 2020 * 2021 * NOTE: Can be called from the cluster code with an oversized buf. 2022 * 2023 * XXX atime update 2024 */ 2025 static 2026 int 2027 hammer_vop_strategy_read(struct vop_strategy_args *ap) 2028 { 2029 struct hammer_transaction trans; 2030 struct hammer_inode *ip; 2031 struct hammer_cursor cursor; 2032 hammer_base_elm_t base; 2033 hammer_off_t disk_offset; 2034 struct bio *bio; 2035 struct bio *nbio; 2036 struct buf *bp; 2037 int64_t rec_offset; 2038 int64_t ran_end; 2039 int64_t tmp64; 2040 int error; 2041 int boff; 2042 int roff; 2043 int n; 2044 2045 bio = ap->a_bio; 2046 bp = bio->bio_buf; 2047 ip = ap->a_vp->v_data; 2048 2049 /* 2050 * The zone-2 disk offset may have been set by the cluster code via 2051 * a BMAP operation, or else should be NOOFFSET. 2052 * 2053 * Checking the high bits for a match against zone-2 should suffice. 2054 */ 2055 nbio = push_bio(bio); 2056 if ((nbio->bio_offset & HAMMER_OFF_ZONE_MASK) == 2057 HAMMER_ZONE_RAW_BUFFER) { 2058 error = hammer_io_direct_read(ip->hmp, nbio); 2059 return (error); 2060 } 2061 2062 /* 2063 * Well, that sucked. Do it the hard way. If all the stars are 2064 * aligned we may still be able to issue a direct-read. 2065 */ 2066 hammer_simple_transaction(&trans, ip->hmp); 2067 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 2068 2069 /* 2070 * Key range (begin and end inclusive) to scan. Note that the key's 2071 * stored in the actual records represent BASE+LEN, not BASE. The 2072 * first record containing bio_offset will have a key > bio_offset. 2073 */ 2074 cursor.key_beg.localization = ip->obj_localization + 2075 HAMMER_LOCALIZE_MISC; 2076 cursor.key_beg.obj_id = ip->obj_id; 2077 cursor.key_beg.create_tid = 0; 2078 cursor.key_beg.delete_tid = 0; 2079 cursor.key_beg.obj_type = 0; 2080 cursor.key_beg.key = bio->bio_offset + 1; 2081 cursor.asof = ip->obj_asof; 2082 cursor.flags |= HAMMER_CURSOR_ASOF; 2083 2084 cursor.key_end = cursor.key_beg; 2085 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE); 2086 #if 0 2087 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) { 2088 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 2089 cursor.key_end.rec_type = HAMMER_RECTYPE_DB; 2090 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2091 } else 2092 #endif 2093 { 2094 ran_end = bio->bio_offset + bp->b_bufsize; 2095 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 2096 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 2097 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ 2098 if (tmp64 < ran_end) 2099 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2100 else 2101 cursor.key_end.key = ran_end + MAXPHYS + 1; 2102 } 2103 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 2104 2105 error = hammer_ip_first(&cursor); 2106 boff = 0; 2107 2108 while (error == 0) { 2109 /* 2110 * Get the base file offset of the record. The key for 2111 * data records is (base + bytes) rather then (base). 2112 */ 2113 base = &cursor.leaf->base; 2114 rec_offset = base->key - cursor.leaf->data_len; 2115 2116 /* 2117 * Calculate the gap, if any, and zero-fill it. 2118 * 2119 * n is the offset of the start of the record verses our 2120 * current seek offset in the bio. 2121 */ 2122 n = (int)(rec_offset - (bio->bio_offset + boff)); 2123 if (n > 0) { 2124 if (n > bp->b_bufsize - boff) 2125 n = bp->b_bufsize - boff; 2126 bzero((char *)bp->b_data + boff, n); 2127 boff += n; 2128 n = 0; 2129 } 2130 2131 /* 2132 * Calculate the data offset in the record and the number 2133 * of bytes we can copy. 2134 * 2135 * There are two degenerate cases. First, boff may already 2136 * be at bp->b_bufsize. Secondly, the data offset within 2137 * the record may exceed the record's size. 2138 */ 2139 roff = -n; 2140 rec_offset += roff; 2141 n = cursor.leaf->data_len - roff; 2142 if (n <= 0) { 2143 kprintf("strategy_read: bad n=%d roff=%d\n", n, roff); 2144 n = 0; 2145 } else if (n > bp->b_bufsize - boff) { 2146 n = bp->b_bufsize - boff; 2147 } 2148 2149 /* 2150 * Deal with cached truncations. This cool bit of code 2151 * allows truncate()/ftruncate() to avoid having to sync 2152 * the file. 2153 * 2154 * If the frontend is truncated then all backend records are 2155 * subject to the frontend's truncation. 2156 * 2157 * If the backend is truncated then backend records on-disk 2158 * (but not in-memory) are subject to the backend's 2159 * truncation. In-memory records owned by the backend 2160 * represent data written after the truncation point on the 2161 * backend and must not be truncated. 2162 * 2163 * Truncate operations deal with frontend buffer cache 2164 * buffers and frontend-owned in-memory records synchronously. 2165 */ 2166 if (ip->flags & HAMMER_INODE_TRUNCATED) { 2167 if (hammer_cursor_ondisk(&cursor) || 2168 cursor.iprec->flush_state == HAMMER_FST_FLUSH) { 2169 if (ip->trunc_off <= rec_offset) 2170 n = 0; 2171 else if (ip->trunc_off < rec_offset + n) 2172 n = (int)(ip->trunc_off - rec_offset); 2173 } 2174 } 2175 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { 2176 if (hammer_cursor_ondisk(&cursor)) { 2177 if (ip->sync_trunc_off <= rec_offset) 2178 n = 0; 2179 else if (ip->sync_trunc_off < rec_offset + n) 2180 n = (int)(ip->sync_trunc_off - rec_offset); 2181 } 2182 } 2183 2184 /* 2185 * Try to issue a direct read into our bio if possible, 2186 * otherwise resolve the element data into a hammer_buffer 2187 * and copy. 2188 * 2189 * The buffer on-disk should be zerod past any real 2190 * truncation point, but may not be for any synthesized 2191 * truncation point from above. 2192 */ 2193 if (boff == 0 && n == bp->b_bufsize && 2194 ((cursor.leaf->data_offset + roff) & HAMMER_BUFMASK) == 0) { 2195 disk_offset = hammer_blockmap_lookup( 2196 trans.hmp, 2197 cursor.leaf->data_offset + roff, 2198 &error); 2199 if (error) 2200 break; 2201 nbio->bio_offset = disk_offset; 2202 error = hammer_io_direct_read(trans.hmp, nbio); 2203 goto done; 2204 } else if (n) { 2205 error = hammer_ip_resolve_data(&cursor); 2206 if (error == 0) { 2207 bcopy((char *)cursor.data + roff, 2208 (char *)bp->b_data + boff, n); 2209 } 2210 } 2211 if (error) 2212 break; 2213 2214 /* 2215 * Iterate until we have filled the request. 2216 */ 2217 boff += n; 2218 if (boff == bp->b_bufsize) 2219 break; 2220 error = hammer_ip_next(&cursor); 2221 } 2222 2223 /* 2224 * There may have been a gap after the last record 2225 */ 2226 if (error == ENOENT) 2227 error = 0; 2228 if (error == 0 && boff != bp->b_bufsize) { 2229 KKASSERT(boff < bp->b_bufsize); 2230 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff); 2231 /* boff = bp->b_bufsize; */ 2232 } 2233 bp->b_resid = 0; 2234 bp->b_error = error; 2235 if (error) 2236 bp->b_flags |= B_ERROR; 2237 biodone(ap->a_bio); 2238 2239 done: 2240 if (cursor.node) 2241 hammer_cache_node(&ip->cache[1], cursor.node); 2242 hammer_done_cursor(&cursor); 2243 hammer_done_transaction(&trans); 2244 return(error); 2245 } 2246 2247 /* 2248 * BMAP operation - used to support cluster_read() only. 2249 * 2250 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 2251 * 2252 * This routine may return EOPNOTSUPP if the opration is not supported for 2253 * the specified offset. The contents of the pointer arguments do not 2254 * need to be initialized in that case. 2255 * 2256 * If a disk address is available and properly aligned return 0 with 2257 * *doffsetp set to the zone-2 address, and *runp / *runb set appropriately 2258 * to the run-length relative to that offset. Callers may assume that 2259 * *doffsetp is valid if 0 is returned, even if *runp is not sufficiently 2260 * large, so return EOPNOTSUPP if it is not sufficiently large. 2261 */ 2262 static 2263 int 2264 hammer_vop_bmap(struct vop_bmap_args *ap) 2265 { 2266 struct hammer_transaction trans; 2267 struct hammer_inode *ip; 2268 struct hammer_cursor cursor; 2269 hammer_base_elm_t base; 2270 int64_t rec_offset; 2271 int64_t ran_end; 2272 int64_t tmp64; 2273 int64_t base_offset; 2274 int64_t base_disk_offset; 2275 int64_t last_offset; 2276 hammer_off_t last_disk_offset; 2277 hammer_off_t disk_offset; 2278 int rec_len; 2279 int error; 2280 int blksize; 2281 2282 ip = ap->a_vp->v_data; 2283 2284 /* 2285 * We can only BMAP regular files. We can't BMAP database files, 2286 * directories, etc. 2287 */ 2288 if (ip->ino_data.obj_type != HAMMER_OBJTYPE_REGFILE) 2289 return(EOPNOTSUPP); 2290 2291 /* 2292 * bmap is typically called with runp/runb both NULL when used 2293 * for writing. We do not support BMAP for writing atm. 2294 */ 2295 if (ap->a_cmd != BUF_CMD_READ) 2296 return(EOPNOTSUPP); 2297 2298 /* 2299 * Scan the B-Tree to acquire blockmap addresses, then translate 2300 * to raw addresses. 2301 */ 2302 hammer_simple_transaction(&trans, ip->hmp); 2303 #if 0 2304 kprintf("bmap_beg %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]); 2305 #endif 2306 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 2307 2308 /* 2309 * Key range (begin and end inclusive) to scan. Note that the key's 2310 * stored in the actual records represent BASE+LEN, not BASE. The 2311 * first record containing bio_offset will have a key > bio_offset. 2312 */ 2313 cursor.key_beg.localization = ip->obj_localization + 2314 HAMMER_LOCALIZE_MISC; 2315 cursor.key_beg.obj_id = ip->obj_id; 2316 cursor.key_beg.create_tid = 0; 2317 cursor.key_beg.delete_tid = 0; 2318 cursor.key_beg.obj_type = 0; 2319 if (ap->a_runb) 2320 cursor.key_beg.key = ap->a_loffset - MAXPHYS + 1; 2321 else 2322 cursor.key_beg.key = ap->a_loffset + 1; 2323 if (cursor.key_beg.key < 0) 2324 cursor.key_beg.key = 0; 2325 cursor.asof = ip->obj_asof; 2326 cursor.flags |= HAMMER_CURSOR_ASOF; 2327 2328 cursor.key_end = cursor.key_beg; 2329 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE); 2330 2331 ran_end = ap->a_loffset + MAXPHYS; 2332 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 2333 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 2334 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ 2335 if (tmp64 < ran_end) 2336 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2337 else 2338 cursor.key_end.key = ran_end + MAXPHYS + 1; 2339 2340 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 2341 2342 error = hammer_ip_first(&cursor); 2343 base_offset = last_offset = 0; 2344 base_disk_offset = last_disk_offset = 0; 2345 2346 while (error == 0) { 2347 /* 2348 * Get the base file offset of the record. The key for 2349 * data records is (base + bytes) rather then (base). 2350 * 2351 * NOTE: rec_offset + rec_len may exceed the end-of-file. 2352 * The extra bytes should be zero on-disk and the BMAP op 2353 * should still be ok. 2354 */ 2355 base = &cursor.leaf->base; 2356 rec_offset = base->key - cursor.leaf->data_len; 2357 rec_len = cursor.leaf->data_len; 2358 2359 /* 2360 * Incorporate any cached truncation. 2361 * 2362 * NOTE: Modifications to rec_len based on synthesized 2363 * truncation points remove the guarantee that any extended 2364 * data on disk is zero (since the truncations may not have 2365 * taken place on-media yet). 2366 */ 2367 if (ip->flags & HAMMER_INODE_TRUNCATED) { 2368 if (hammer_cursor_ondisk(&cursor) || 2369 cursor.iprec->flush_state == HAMMER_FST_FLUSH) { 2370 if (ip->trunc_off <= rec_offset) 2371 rec_len = 0; 2372 else if (ip->trunc_off < rec_offset + rec_len) 2373 rec_len = (int)(ip->trunc_off - rec_offset); 2374 } 2375 } 2376 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { 2377 if (hammer_cursor_ondisk(&cursor)) { 2378 if (ip->sync_trunc_off <= rec_offset) 2379 rec_len = 0; 2380 else if (ip->sync_trunc_off < rec_offset + rec_len) 2381 rec_len = (int)(ip->sync_trunc_off - rec_offset); 2382 } 2383 } 2384 2385 /* 2386 * Accumulate information. If we have hit a discontiguous 2387 * block reset base_offset unless we are already beyond the 2388 * requested offset. If we are, that's it, we stop. 2389 */ 2390 disk_offset = hammer_blockmap_lookup(trans.hmp, 2391 cursor.leaf->data_offset, 2392 &error); 2393 if (error) 2394 break; 2395 if (rec_offset != last_offset || 2396 disk_offset != last_disk_offset) { 2397 if (rec_offset > ap->a_loffset) 2398 break; 2399 base_offset = rec_offset; 2400 base_disk_offset = disk_offset; 2401 } 2402 last_offset = rec_offset + rec_len; 2403 last_disk_offset = disk_offset + rec_len; 2404 2405 error = hammer_ip_next(&cursor); 2406 } 2407 2408 #if 0 2409 kprintf("BMAP %016llx: %016llx - %016llx\n", 2410 ap->a_loffset, base_offset, last_offset); 2411 kprintf("BMAP %16s: %016llx - %016llx\n", 2412 "", base_disk_offset, last_disk_offset); 2413 #endif 2414 2415 if (cursor.node) { 2416 hammer_cache_node(&ip->cache[1], cursor.node); 2417 #if 0 2418 kprintf("bmap_end2 %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]); 2419 #endif 2420 } 2421 hammer_done_cursor(&cursor); 2422 hammer_done_transaction(&trans); 2423 2424 /* 2425 * If we couldn't find any records or the records we did find were 2426 * all behind the requested offset, return failure. A forward 2427 * truncation can leave a hole w/ no on-disk records. 2428 */ 2429 if (last_offset == 0 || last_offset < ap->a_loffset) 2430 return (EOPNOTSUPP); 2431 2432 /* 2433 * Figure out the block size at the requested offset and adjust 2434 * our limits so the cluster_read() does not create inappropriately 2435 * sized buffer cache buffers. 2436 */ 2437 blksize = hammer_blocksize(ap->a_loffset); 2438 if (hammer_blocksize(base_offset) != blksize) { 2439 base_offset = hammer_blockdemarc(base_offset, ap->a_loffset); 2440 } 2441 if (last_offset != ap->a_loffset && 2442 hammer_blocksize(last_offset - 1) != blksize) { 2443 last_offset = hammer_blockdemarc(ap->a_loffset, 2444 last_offset - 1); 2445 } 2446 2447 /* 2448 * Returning EOPNOTSUPP simply prevents the direct-IO optimization 2449 * from occuring. 2450 */ 2451 disk_offset = base_disk_offset + (ap->a_loffset - base_offset); 2452 2453 /* 2454 * If doffsetp is not aligned or the forward run size does 2455 * not cover a whole buffer, disallow the direct I/O. 2456 */ 2457 if ((disk_offset & HAMMER_BUFMASK) || 2458 (last_offset - ap->a_loffset) < blksize) { 2459 error = EOPNOTSUPP; 2460 } else { 2461 *ap->a_doffsetp = disk_offset; 2462 if (ap->a_runb) { 2463 *ap->a_runb = ap->a_loffset - base_offset; 2464 KKASSERT(*ap->a_runb >= 0); 2465 } 2466 if (ap->a_runp) { 2467 *ap->a_runp = last_offset - ap->a_loffset; 2468 KKASSERT(*ap->a_runp >= 0); 2469 } 2470 error = 0; 2471 } 2472 return(error); 2473 } 2474 2475 /* 2476 * Write to a regular file. Because this is a strategy call the OS is 2477 * trying to actually get data onto the media. 2478 */ 2479 static 2480 int 2481 hammer_vop_strategy_write(struct vop_strategy_args *ap) 2482 { 2483 hammer_record_t record; 2484 hammer_mount_t hmp; 2485 hammer_inode_t ip; 2486 struct bio *bio; 2487 struct buf *bp; 2488 int blksize; 2489 int bytes; 2490 int error; 2491 2492 bio = ap->a_bio; 2493 bp = bio->bio_buf; 2494 ip = ap->a_vp->v_data; 2495 hmp = ip->hmp; 2496 2497 blksize = hammer_blocksize(bio->bio_offset); 2498 KKASSERT(bp->b_bufsize == blksize); 2499 2500 if (ip->flags & HAMMER_INODE_RO) { 2501 bp->b_error = EROFS; 2502 bp->b_flags |= B_ERROR; 2503 biodone(ap->a_bio); 2504 return(EROFS); 2505 } 2506 2507 /* 2508 * Interlock with inode destruction (no in-kernel or directory 2509 * topology visibility). If we queue new IO while trying to 2510 * destroy the inode we can deadlock the vtrunc call in 2511 * hammer_inode_unloadable_check(). 2512 */ 2513 if (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) { 2514 bp->b_resid = 0; 2515 biodone(ap->a_bio); 2516 return(0); 2517 } 2518 2519 /* 2520 * Reserve space and issue a direct-write from the front-end. 2521 * NOTE: The direct_io code will hammer_bread/bcopy smaller 2522 * allocations. 2523 * 2524 * An in-memory record will be installed to reference the storage 2525 * until the flusher can get to it. 2526 * 2527 * Since we own the high level bio the front-end will not try to 2528 * do a direct-read until the write completes. 2529 * 2530 * NOTE: The only time we do not reserve a full-sized buffers 2531 * worth of data is if the file is small. We do not try to 2532 * allocate a fragment (from the small-data zone) at the end of 2533 * an otherwise large file as this can lead to wildly separated 2534 * data. 2535 */ 2536 KKASSERT((bio->bio_offset & HAMMER_BUFMASK) == 0); 2537 KKASSERT(bio->bio_offset < ip->ino_data.size); 2538 if (bio->bio_offset || ip->ino_data.size > HAMMER_BUFSIZE / 2) 2539 bytes = bp->b_bufsize; 2540 else 2541 bytes = ((int)ip->ino_data.size + 15) & ~15; 2542 2543 record = hammer_ip_add_bulk(ip, bio->bio_offset, bp->b_data, 2544 bytes, &error); 2545 if (record) { 2546 hammer_io_direct_write(hmp, &record->leaf, bio); 2547 hammer_rel_mem_record(record); 2548 if (ip->rsv_recs > 1 && hmp->rsv_recs > hammer_limit_recs) 2549 hammer_flush_inode(ip, 0); 2550 } else { 2551 bp->b_bio2.bio_offset = NOOFFSET; 2552 bp->b_error = error; 2553 bp->b_flags |= B_ERROR; 2554 biodone(ap->a_bio); 2555 } 2556 return(error); 2557 } 2558 2559 /* 2560 * dounlink - disconnect a directory entry 2561 * 2562 * XXX whiteout support not really in yet 2563 */ 2564 static int 2565 hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, 2566 struct vnode *dvp, struct ucred *cred, int flags) 2567 { 2568 struct namecache *ncp; 2569 hammer_inode_t dip; 2570 hammer_inode_t ip; 2571 struct hammer_cursor cursor; 2572 int64_t namekey; 2573 int nlen, error; 2574 2575 /* 2576 * Calculate the namekey and setup the key range for the scan. This 2577 * works kinda like a chained hash table where the lower 32 bits 2578 * of the namekey synthesize the chain. 2579 * 2580 * The key range is inclusive of both key_beg and key_end. 2581 */ 2582 dip = VTOI(dvp); 2583 ncp = nch->ncp; 2584 2585 if (dip->flags & HAMMER_INODE_RO) 2586 return (EROFS); 2587 2588 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen); 2589 retry: 2590 hammer_init_cursor(trans, &cursor, &dip->cache[1], dip); 2591 cursor.key_beg.localization = dip->obj_localization + 2592 HAMMER_LOCALIZE_MISC; 2593 cursor.key_beg.obj_id = dip->obj_id; 2594 cursor.key_beg.key = namekey; 2595 cursor.key_beg.create_tid = 0; 2596 cursor.key_beg.delete_tid = 0; 2597 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 2598 cursor.key_beg.obj_type = 0; 2599 2600 cursor.key_end = cursor.key_beg; 2601 cursor.key_end.key |= 0xFFFFFFFFULL; 2602 cursor.asof = dip->obj_asof; 2603 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 2604 2605 /* 2606 * Scan all matching records (the chain), locate the one matching 2607 * the requested path component. info->last_error contains the 2608 * error code on search termination and could be 0, ENOENT, or 2609 * something else. 2610 * 2611 * The hammer_ip_*() functions merge in-memory records with on-disk 2612 * records for the purposes of the search. 2613 */ 2614 error = hammer_ip_first(&cursor); 2615 2616 while (error == 0) { 2617 error = hammer_ip_resolve_data(&cursor); 2618 if (error) 2619 break; 2620 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF; 2621 KKASSERT(nlen > 0); 2622 if (ncp->nc_nlen == nlen && 2623 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) { 2624 break; 2625 } 2626 error = hammer_ip_next(&cursor); 2627 } 2628 2629 /* 2630 * If all is ok we have to get the inode so we can adjust nlinks. 2631 * To avoid a deadlock with the flusher we must release the inode 2632 * lock on the directory when acquiring the inode for the entry. 2633 * 2634 * If the target is a directory, it must be empty. 2635 */ 2636 if (error == 0) { 2637 hammer_unlock(&cursor.ip->lock); 2638 ip = hammer_get_inode(trans, dip, cursor.data->entry.obj_id, 2639 dip->hmp->asof, 2640 cursor.data->entry.localization, 2641 0, &error); 2642 hammer_lock_sh(&cursor.ip->lock); 2643 if (error == ENOENT) { 2644 kprintf("obj_id %016llx\n", cursor.data->entry.obj_id); 2645 Debugger("ENOENT unlinking object that should exist"); 2646 } 2647 2648 /* 2649 * If we are trying to remove a directory the directory must 2650 * be empty. 2651 * 2652 * WARNING: hammer_ip_check_directory_empty() may have to 2653 * terminate the cursor to avoid a deadlock. It is ok to 2654 * call hammer_done_cursor() twice. 2655 */ 2656 if (error == 0 && ip->ino_data.obj_type == 2657 HAMMER_OBJTYPE_DIRECTORY) { 2658 error = hammer_ip_check_directory_empty(trans, ip); 2659 } 2660 2661 /* 2662 * Delete the directory entry. 2663 * 2664 * WARNING: hammer_ip_del_directory() may have to terminate 2665 * the cursor to avoid a deadlock. It is ok to call 2666 * hammer_done_cursor() twice. 2667 */ 2668 if (error == 0) { 2669 error = hammer_ip_del_directory(trans, &cursor, 2670 dip, ip); 2671 } 2672 hammer_done_cursor(&cursor); 2673 if (error == 0) { 2674 cache_setunresolved(nch); 2675 cache_setvp(nch, NULL); 2676 /* XXX locking */ 2677 if (ip->vp) 2678 cache_inval_vp(ip->vp, CINV_DESTROY); 2679 } 2680 if (ip) 2681 hammer_rel_inode(ip, 0); 2682 } else { 2683 hammer_done_cursor(&cursor); 2684 } 2685 if (error == EDEADLK) 2686 goto retry; 2687 2688 return (error); 2689 } 2690 2691 /************************************************************************ 2692 * FIFO AND SPECFS OPS * 2693 ************************************************************************ 2694 * 2695 */ 2696 2697 static int 2698 hammer_vop_fifoclose (struct vop_close_args *ap) 2699 { 2700 /* XXX update itimes */ 2701 return (VOCALL(&fifo_vnode_vops, &ap->a_head)); 2702 } 2703 2704 static int 2705 hammer_vop_fiforead (struct vop_read_args *ap) 2706 { 2707 int error; 2708 2709 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2710 /* XXX update access time */ 2711 return (error); 2712 } 2713 2714 static int 2715 hammer_vop_fifowrite (struct vop_write_args *ap) 2716 { 2717 int error; 2718 2719 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2720 /* XXX update access time */ 2721 return (error); 2722 } 2723 2724 static int 2725 hammer_vop_specclose (struct vop_close_args *ap) 2726 { 2727 /* XXX update itimes */ 2728 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 2729 } 2730 2731 static int 2732 hammer_vop_specread (struct vop_read_args *ap) 2733 { 2734 /* XXX update access time */ 2735 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 2736 } 2737 2738 static int 2739 hammer_vop_specwrite (struct vop_write_args *ap) 2740 { 2741 /* XXX update last change time */ 2742 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 2743 } 2744 2745