1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.96 2008/08/09 07:04:16 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/namecache.h> 42 #include <sys/vnode.h> 43 #include <sys/lockf.h> 44 #include <sys/event.h> 45 #include <sys/stat.h> 46 #include <sys/dirent.h> 47 #include <vm/vm_extern.h> 48 #include <vfs/fifofs/fifo.h> 49 #include "hammer.h" 50 51 /* 52 * USERFS VNOPS 53 */ 54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/ 55 static int hammer_vop_fsync(struct vop_fsync_args *); 56 static int hammer_vop_read(struct vop_read_args *); 57 static int hammer_vop_write(struct vop_write_args *); 58 static int hammer_vop_access(struct vop_access_args *); 59 static int hammer_vop_advlock(struct vop_advlock_args *); 60 static int hammer_vop_close(struct vop_close_args *); 61 static int hammer_vop_ncreate(struct vop_ncreate_args *); 62 static int hammer_vop_getattr(struct vop_getattr_args *); 63 static int hammer_vop_nresolve(struct vop_nresolve_args *); 64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *); 65 static int hammer_vop_nlink(struct vop_nlink_args *); 66 static int hammer_vop_nmkdir(struct vop_nmkdir_args *); 67 static int hammer_vop_nmknod(struct vop_nmknod_args *); 68 static int hammer_vop_open(struct vop_open_args *); 69 static int hammer_vop_pathconf(struct vop_pathconf_args *); 70 static int hammer_vop_print(struct vop_print_args *); 71 static int hammer_vop_readdir(struct vop_readdir_args *); 72 static int hammer_vop_readlink(struct vop_readlink_args *); 73 static int hammer_vop_nremove(struct vop_nremove_args *); 74 static int hammer_vop_nrename(struct vop_nrename_args *); 75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *); 76 static int hammer_vop_setattr(struct vop_setattr_args *); 77 static int hammer_vop_strategy(struct vop_strategy_args *); 78 static int hammer_vop_bmap(struct vop_bmap_args *ap); 79 static int hammer_vop_nsymlink(struct vop_nsymlink_args *); 80 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *); 81 static int hammer_vop_ioctl(struct vop_ioctl_args *); 82 static int hammer_vop_mountctl(struct vop_mountctl_args *); 83 84 static int hammer_vop_fifoclose (struct vop_close_args *); 85 static int hammer_vop_fiforead (struct vop_read_args *); 86 static int hammer_vop_fifowrite (struct vop_write_args *); 87 88 static int hammer_vop_specclose (struct vop_close_args *); 89 static int hammer_vop_specread (struct vop_read_args *); 90 static int hammer_vop_specwrite (struct vop_write_args *); 91 92 struct vop_ops hammer_vnode_vops = { 93 .vop_default = vop_defaultop, 94 .vop_fsync = hammer_vop_fsync, 95 .vop_getpages = vop_stdgetpages, 96 .vop_putpages = vop_stdputpages, 97 .vop_read = hammer_vop_read, 98 .vop_write = hammer_vop_write, 99 .vop_access = hammer_vop_access, 100 .vop_advlock = hammer_vop_advlock, 101 .vop_close = hammer_vop_close, 102 .vop_ncreate = hammer_vop_ncreate, 103 .vop_getattr = hammer_vop_getattr, 104 .vop_inactive = hammer_vop_inactive, 105 .vop_reclaim = hammer_vop_reclaim, 106 .vop_nresolve = hammer_vop_nresolve, 107 .vop_nlookupdotdot = hammer_vop_nlookupdotdot, 108 .vop_nlink = hammer_vop_nlink, 109 .vop_nmkdir = hammer_vop_nmkdir, 110 .vop_nmknod = hammer_vop_nmknod, 111 .vop_open = hammer_vop_open, 112 .vop_pathconf = hammer_vop_pathconf, 113 .vop_print = hammer_vop_print, 114 .vop_readdir = hammer_vop_readdir, 115 .vop_readlink = hammer_vop_readlink, 116 .vop_nremove = hammer_vop_nremove, 117 .vop_nrename = hammer_vop_nrename, 118 .vop_nrmdir = hammer_vop_nrmdir, 119 .vop_setattr = hammer_vop_setattr, 120 .vop_bmap = hammer_vop_bmap, 121 .vop_strategy = hammer_vop_strategy, 122 .vop_nsymlink = hammer_vop_nsymlink, 123 .vop_nwhiteout = hammer_vop_nwhiteout, 124 .vop_ioctl = hammer_vop_ioctl, 125 .vop_mountctl = hammer_vop_mountctl 126 }; 127 128 struct vop_ops hammer_spec_vops = { 129 .vop_default = spec_vnoperate, 130 .vop_fsync = hammer_vop_fsync, 131 .vop_read = hammer_vop_specread, 132 .vop_write = hammer_vop_specwrite, 133 .vop_access = hammer_vop_access, 134 .vop_close = hammer_vop_specclose, 135 .vop_getattr = hammer_vop_getattr, 136 .vop_inactive = hammer_vop_inactive, 137 .vop_reclaim = hammer_vop_reclaim, 138 .vop_setattr = hammer_vop_setattr 139 }; 140 141 struct vop_ops hammer_fifo_vops = { 142 .vop_default = fifo_vnoperate, 143 .vop_fsync = hammer_vop_fsync, 144 .vop_read = hammer_vop_fiforead, 145 .vop_write = hammer_vop_fifowrite, 146 .vop_access = hammer_vop_access, 147 .vop_close = hammer_vop_fifoclose, 148 .vop_getattr = hammer_vop_getattr, 149 .vop_inactive = hammer_vop_inactive, 150 .vop_reclaim = hammer_vop_reclaim, 151 .vop_setattr = hammer_vop_setattr 152 }; 153 154 #ifdef DEBUG_TRUNCATE 155 struct hammer_inode *HammerTruncIp; 156 #endif 157 158 static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, 159 struct vnode *dvp, struct ucred *cred, 160 int flags, int isdir); 161 static int hammer_vop_strategy_read(struct vop_strategy_args *ap); 162 static int hammer_vop_strategy_write(struct vop_strategy_args *ap); 163 164 #if 0 165 static 166 int 167 hammer_vop_vnoperate(struct vop_generic_args *) 168 { 169 return (VOCALL(&hammer_vnode_vops, ap)); 170 } 171 #endif 172 173 /* 174 * hammer_vop_fsync { vp, waitfor } 175 * 176 * fsync() an inode to disk and wait for it to be completely committed 177 * such that the information would not be undone if a crash occured after 178 * return. 179 */ 180 static 181 int 182 hammer_vop_fsync(struct vop_fsync_args *ap) 183 { 184 hammer_inode_t ip = VTOI(ap->a_vp); 185 186 ++hammer_count_fsyncs; 187 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL); 188 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 189 if (ap->a_waitfor == MNT_WAIT) 190 hammer_wait_inode(ip); 191 return (ip->error); 192 } 193 194 /* 195 * hammer_vop_read { vp, uio, ioflag, cred } 196 */ 197 static 198 int 199 hammer_vop_read(struct vop_read_args *ap) 200 { 201 struct hammer_transaction trans; 202 hammer_inode_t ip; 203 off_t offset; 204 struct buf *bp; 205 struct uio *uio; 206 int error; 207 int n; 208 int seqcount; 209 int ioseqcount; 210 int blksize; 211 212 if (ap->a_vp->v_type != VREG) 213 return (EINVAL); 214 ip = VTOI(ap->a_vp); 215 error = 0; 216 uio = ap->a_uio; 217 218 /* 219 * Allow the UIO's size to override the sequential heuristic. 220 */ 221 blksize = hammer_blocksize(uio->uio_offset); 222 seqcount = (uio->uio_resid + (blksize - 1)) / blksize; 223 ioseqcount = ap->a_ioflag >> 16; 224 if (seqcount < ioseqcount) 225 seqcount = ioseqcount; 226 227 hammer_start_transaction(&trans, ip->hmp); 228 229 /* 230 * Access the data typically in HAMMER_BUFSIZE blocks via the 231 * buffer cache, but HAMMER may use a variable block size based 232 * on the offset. 233 */ 234 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) { 235 int64_t base_offset; 236 int64_t file_limit; 237 238 blksize = hammer_blocksize(uio->uio_offset); 239 offset = (int)uio->uio_offset & (blksize - 1); 240 base_offset = uio->uio_offset - offset; 241 242 if (hammer_cluster_enable) { 243 /* 244 * Use file_limit to prevent cluster_read() from 245 * creating buffers of the wrong block size past 246 * the demarc. 247 */ 248 file_limit = ip->ino_data.size; 249 if (base_offset < HAMMER_XDEMARC && 250 file_limit > HAMMER_XDEMARC) { 251 file_limit = HAMMER_XDEMARC; 252 } 253 error = cluster_read(ap->a_vp, 254 file_limit, base_offset, 255 blksize, MAXPHYS, 256 seqcount, &bp); 257 } else { 258 error = bread(ap->a_vp, base_offset, blksize, &bp); 259 } 260 if (error) { 261 kprintf("error %d\n", error); 262 brelse(bp); 263 break; 264 } 265 266 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 267 n = blksize - offset; 268 if (n > uio->uio_resid) 269 n = uio->uio_resid; 270 if (n > ip->ino_data.size - uio->uio_offset) 271 n = (int)(ip->ino_data.size - uio->uio_offset); 272 error = uiomove((char *)bp->b_data + offset, n, uio); 273 274 /* data has a lower priority then meta-data */ 275 bp->b_flags |= B_AGE; 276 bqrelse(bp); 277 if (error) 278 break; 279 hammer_stats_file_read += n; 280 } 281 if ((ip->flags & HAMMER_INODE_RO) == 0 && 282 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) { 283 ip->ino_data.atime = trans.time; 284 hammer_modify_inode(ip, HAMMER_INODE_ATIME); 285 } 286 hammer_done_transaction(&trans); 287 return (error); 288 } 289 290 /* 291 * hammer_vop_write { vp, uio, ioflag, cred } 292 */ 293 static 294 int 295 hammer_vop_write(struct vop_write_args *ap) 296 { 297 struct hammer_transaction trans; 298 struct hammer_inode *ip; 299 hammer_mount_t hmp; 300 struct uio *uio; 301 int offset; 302 off_t base_offset; 303 struct buf *bp; 304 int error; 305 int n; 306 int flags; 307 int delta; 308 int seqcount; 309 310 if (ap->a_vp->v_type != VREG) 311 return (EINVAL); 312 ip = VTOI(ap->a_vp); 313 hmp = ip->hmp; 314 error = 0; 315 seqcount = ap->a_ioflag >> 16; 316 317 if (ip->flags & HAMMER_INODE_RO) 318 return (EROFS); 319 320 /* 321 * Create a transaction to cover the operations we perform. 322 */ 323 hammer_start_transaction(&trans, hmp); 324 uio = ap->a_uio; 325 326 /* 327 * Check append mode 328 */ 329 if (ap->a_ioflag & IO_APPEND) 330 uio->uio_offset = ip->ino_data.size; 331 332 /* 333 * Check for illegal write offsets. Valid range is 0...2^63-1. 334 * 335 * NOTE: the base_off assignment is required to work around what 336 * I consider to be a GCC-4 optimization bug. 337 */ 338 if (uio->uio_offset < 0) { 339 hammer_done_transaction(&trans); 340 return (EFBIG); 341 } 342 base_offset = uio->uio_offset + uio->uio_resid; /* work around gcc-4 */ 343 if (uio->uio_resid > 0 && base_offset <= 0) { 344 hammer_done_transaction(&trans); 345 return (EFBIG); 346 } 347 348 /* 349 * Access the data typically in HAMMER_BUFSIZE blocks via the 350 * buffer cache, but HAMMER may use a variable block size based 351 * on the offset. 352 */ 353 while (uio->uio_resid > 0) { 354 int fixsize = 0; 355 int blksize; 356 int blkmask; 357 358 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_WRITE)) != 0) 359 break; 360 361 blksize = hammer_blocksize(uio->uio_offset); 362 363 /* 364 * Do not allow HAMMER to blow out the buffer cache. Very 365 * large UIOs can lockout other processes due to bwillwrite() 366 * mechanics. 367 * 368 * The hammer inode is not locked during these operations. 369 * The vnode is locked which can interfere with the pageout 370 * daemon for non-UIO_NOCOPY writes but should not interfere 371 * with the buffer cache. Even so, we cannot afford to 372 * allow the pageout daemon to build up too many dirty buffer 373 * cache buffers. 374 */ 375 /*if (((int)uio->uio_offset & (blksize - 1)) == 0)*/ 376 bwillwrite(blksize); 377 378 /* 379 * Do not allow HAMMER to blow out system memory by 380 * accumulating too many records. Records are so well 381 * decoupled from the buffer cache that it is possible 382 * for userland to push data out to the media via 383 * direct-write, but build up the records queued to the 384 * backend faster then the backend can flush them out. 385 * HAMMER has hit its write limit but the frontend has 386 * no pushback to slow it down. 387 */ 388 if (hmp->rsv_recs > hammer_limit_recs / 2) { 389 /* 390 * Get the inode on the flush list 391 */ 392 if (ip->rsv_recs >= 64) 393 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); 394 else if (ip->rsv_recs >= 16) 395 hammer_flush_inode(ip, 0); 396 397 /* 398 * Keep the flusher going if the system keeps 399 * queueing records. 400 */ 401 delta = hmp->count_newrecords - 402 hmp->last_newrecords; 403 if (delta < 0 || delta > hammer_limit_recs / 2) { 404 hmp->last_newrecords = hmp->count_newrecords; 405 hammer_sync_hmp(hmp, MNT_NOWAIT); 406 } 407 408 /* 409 * If we have gotten behind start slowing 410 * down the writers. 411 */ 412 delta = (hmp->rsv_recs - hammer_limit_recs) * 413 hz / hammer_limit_recs; 414 if (delta > 0) 415 tsleep(&trans, 0, "hmrslo", delta); 416 } 417 418 /* 419 * Calculate the blocksize at the current offset and figure 420 * out how much we can actually write. 421 */ 422 blkmask = blksize - 1; 423 offset = (int)uio->uio_offset & blkmask; 424 base_offset = uio->uio_offset & ~(int64_t)blkmask; 425 n = blksize - offset; 426 if (n > uio->uio_resid) 427 n = uio->uio_resid; 428 if (uio->uio_offset + n > ip->ino_data.size) { 429 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n); 430 fixsize = 1; 431 } 432 433 if (uio->uio_segflg == UIO_NOCOPY) { 434 /* 435 * Issuing a write with the same data backing the 436 * buffer. Instantiate the buffer to collect the 437 * backing vm pages, then read-in any missing bits. 438 * 439 * This case is used by vop_stdputpages(). 440 */ 441 bp = getblk(ap->a_vp, base_offset, 442 blksize, GETBLK_BHEAVY, 0); 443 if ((bp->b_flags & B_CACHE) == 0) { 444 bqrelse(bp); 445 error = bread(ap->a_vp, base_offset, 446 blksize, &bp); 447 } 448 } else if (offset == 0 && uio->uio_resid >= blksize) { 449 /* 450 * Even though we are entirely overwriting the buffer 451 * we may still have to zero it out to avoid a 452 * mmap/write visibility issue. 453 */ 454 bp = getblk(ap->a_vp, base_offset, blksize, GETBLK_BHEAVY, 0); 455 if ((bp->b_flags & B_CACHE) == 0) 456 vfs_bio_clrbuf(bp); 457 } else if (base_offset >= ip->ino_data.size) { 458 /* 459 * If the base offset of the buffer is beyond the 460 * file EOF, we don't have to issue a read. 461 */ 462 bp = getblk(ap->a_vp, base_offset, 463 blksize, GETBLK_BHEAVY, 0); 464 vfs_bio_clrbuf(bp); 465 } else { 466 /* 467 * Partial overwrite, read in any missing bits then 468 * replace the portion being written. 469 */ 470 error = bread(ap->a_vp, base_offset, blksize, &bp); 471 if (error == 0) 472 bheavy(bp); 473 } 474 if (error == 0) { 475 error = uiomove((char *)bp->b_data + offset, 476 n, uio); 477 } 478 479 /* 480 * If we screwed up we have to undo any VM size changes we 481 * made. 482 */ 483 if (error) { 484 brelse(bp); 485 if (fixsize) { 486 vtruncbuf(ap->a_vp, ip->ino_data.size, 487 hammer_blocksize(ip->ino_data.size)); 488 } 489 break; 490 } 491 hammer_stats_file_write += n; 492 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ 493 if (ip->ino_data.size < uio->uio_offset) { 494 ip->ino_data.size = uio->uio_offset; 495 flags = HAMMER_INODE_DDIRTY; 496 vnode_pager_setsize(ap->a_vp, ip->ino_data.size); 497 } else { 498 flags = 0; 499 } 500 ip->ino_data.mtime = trans.time; 501 flags |= HAMMER_INODE_MTIME | HAMMER_INODE_BUFS; 502 hammer_modify_inode(ip, flags); 503 504 /* 505 * Once we dirty the buffer any cached zone-X offset 506 * becomes invalid. HAMMER NOTE: no-history mode cannot 507 * allow overwriting over the same data sector unless 508 * we provide UNDOs for the old data, which we don't. 509 */ 510 bp->b_bio2.bio_offset = NOOFFSET; 511 512 /* 513 * Final buffer disposition. 514 */ 515 bp->b_flags |= B_AGE; 516 if (ap->a_ioflag & IO_SYNC) { 517 bwrite(bp); 518 } else if (ap->a_ioflag & IO_DIRECT) { 519 bawrite(bp); 520 } else { 521 bdwrite(bp); 522 } 523 } 524 hammer_done_transaction(&trans); 525 return (error); 526 } 527 528 /* 529 * hammer_vop_access { vp, mode, cred } 530 */ 531 static 532 int 533 hammer_vop_access(struct vop_access_args *ap) 534 { 535 struct hammer_inode *ip = VTOI(ap->a_vp); 536 uid_t uid; 537 gid_t gid; 538 int error; 539 540 ++hammer_stats_file_iopsr; 541 uid = hammer_to_unix_xid(&ip->ino_data.uid); 542 gid = hammer_to_unix_xid(&ip->ino_data.gid); 543 544 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode, 545 ip->ino_data.uflags); 546 return (error); 547 } 548 549 /* 550 * hammer_vop_advlock { vp, id, op, fl, flags } 551 */ 552 static 553 int 554 hammer_vop_advlock(struct vop_advlock_args *ap) 555 { 556 hammer_inode_t ip = VTOI(ap->a_vp); 557 558 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size)); 559 } 560 561 /* 562 * hammer_vop_close { vp, fflag } 563 */ 564 static 565 int 566 hammer_vop_close(struct vop_close_args *ap) 567 { 568 hammer_inode_t ip = VTOI(ap->a_vp); 569 570 if ((ip->flags | ip->sync_flags) & HAMMER_INODE_MODMASK) 571 hammer_inode_waitreclaims(ip->hmp); 572 return (vop_stdclose(ap)); 573 } 574 575 /* 576 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap } 577 * 578 * The operating system has already ensured that the directory entry 579 * does not exist and done all appropriate namespace locking. 580 */ 581 static 582 int 583 hammer_vop_ncreate(struct vop_ncreate_args *ap) 584 { 585 struct hammer_transaction trans; 586 struct hammer_inode *dip; 587 struct hammer_inode *nip; 588 struct nchandle *nch; 589 int error; 590 591 nch = ap->a_nch; 592 dip = VTOI(ap->a_dvp); 593 594 if (dip->flags & HAMMER_INODE_RO) 595 return (EROFS); 596 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 597 return (error); 598 599 /* 600 * Create a transaction to cover the operations we perform. 601 */ 602 hammer_start_transaction(&trans, dip->hmp); 603 ++hammer_stats_file_iopsw; 604 605 /* 606 * Create a new filesystem object of the requested type. The 607 * returned inode will be referenced and shared-locked to prevent 608 * it from being moved to the flusher. 609 */ 610 611 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 612 dip, NULL, &nip); 613 if (error) { 614 hkprintf("hammer_create_inode error %d\n", error); 615 hammer_done_transaction(&trans); 616 *ap->a_vpp = NULL; 617 return (error); 618 } 619 620 /* 621 * Add the new filesystem object to the directory. This will also 622 * bump the inode's link count. 623 */ 624 error = hammer_ip_add_directory(&trans, dip, 625 nch->ncp->nc_name, nch->ncp->nc_nlen, 626 nip); 627 if (error) 628 hkprintf("hammer_ip_add_directory error %d\n", error); 629 630 /* 631 * Finish up. 632 */ 633 if (error) { 634 hammer_rel_inode(nip, 0); 635 hammer_done_transaction(&trans); 636 *ap->a_vpp = NULL; 637 } else { 638 error = hammer_get_vnode(nip, ap->a_vpp); 639 hammer_done_transaction(&trans); 640 hammer_rel_inode(nip, 0); 641 if (error == 0) { 642 cache_setunresolved(ap->a_nch); 643 cache_setvp(ap->a_nch, *ap->a_vpp); 644 } 645 } 646 return (error); 647 } 648 649 /* 650 * hammer_vop_getattr { vp, vap } 651 * 652 * Retrieve an inode's attribute information. When accessing inodes 653 * historically we fake the atime field to ensure consistent results. 654 * The atime field is stored in the B-Tree element and allowed to be 655 * updated without cycling the element. 656 */ 657 static 658 int 659 hammer_vop_getattr(struct vop_getattr_args *ap) 660 { 661 struct hammer_inode *ip = VTOI(ap->a_vp); 662 struct vattr *vap = ap->a_vap; 663 664 /* 665 * We want the fsid to be different when accessing a filesystem 666 * with different as-of's so programs like diff don't think 667 * the files are the same. 668 * 669 * We also want the fsid to be the same when comparing snapshots, 670 * or when comparing mirrors (which might be backed by different 671 * physical devices). HAMMER fsids are based on the PFS's 672 * shared_uuid field. 673 * 674 * XXX there is a chance of collision here. The va_fsid reported 675 * by stat is different from the more involved fsid used in the 676 * mount structure. 677 */ 678 ++hammer_stats_file_iopsr; 679 vap->va_fsid = ip->pfsm->fsid_udev ^ (u_int32_t)ip->obj_asof ^ 680 (u_int32_t)(ip->obj_asof >> 32); 681 682 vap->va_fileid = ip->ino_leaf.base.obj_id; 683 vap->va_mode = ip->ino_data.mode; 684 vap->va_nlink = ip->ino_data.nlinks; 685 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid); 686 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid); 687 vap->va_rmajor = 0; 688 vap->va_rminor = 0; 689 vap->va_size = ip->ino_data.size; 690 691 /* 692 * Special case for @@PFS softlinks. The actual size of the 693 * expanded softlink is "@@0x%016llx:%05d" == 26 bytes. 694 */ 695 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_SOFTLINK && 696 ip->ino_data.size == 10 && 697 ip->obj_asof == HAMMER_MAX_TID && 698 ip->obj_localization == 0 && 699 strncmp(ip->ino_data.ext.symlink, "@@PFS", 5) == 0) { 700 vap->va_size = 26; 701 } 702 703 /* 704 * We must provide a consistent atime and mtime for snapshots 705 * so people can do a 'tar cf - ... | md5' on them and get 706 * consistent results. 707 */ 708 if (ip->flags & HAMMER_INODE_RO) { 709 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_atime); 710 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_mtime); 711 } else { 712 hammer_time_to_timespec(ip->ino_data.atime, &vap->va_atime); 713 hammer_time_to_timespec(ip->ino_data.mtime, &vap->va_mtime); 714 } 715 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_ctime); 716 vap->va_flags = ip->ino_data.uflags; 717 vap->va_gen = 1; /* hammer inums are unique for all time */ 718 vap->va_blocksize = HAMMER_BUFSIZE; 719 if (ip->ino_data.size >= HAMMER_XDEMARC) { 720 vap->va_bytes = (ip->ino_data.size + HAMMER_XBUFMASK64) & 721 ~HAMMER_XBUFMASK64; 722 } else if (ip->ino_data.size > HAMMER_BUFSIZE / 2) { 723 vap->va_bytes = (ip->ino_data.size + HAMMER_BUFMASK64) & 724 ~HAMMER_BUFMASK64; 725 } else { 726 vap->va_bytes = (ip->ino_data.size + 15) & ~15; 727 } 728 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type); 729 vap->va_filerev = 0; /* XXX */ 730 /* mtime uniquely identifies any adjustments made to the file XXX */ 731 vap->va_fsmid = ip->ino_data.mtime; 732 vap->va_uid_uuid = ip->ino_data.uid; 733 vap->va_gid_uuid = ip->ino_data.gid; 734 vap->va_fsid_uuid = ip->hmp->fsid; 735 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | 736 VA_FSID_UUID_VALID; 737 738 switch (ip->ino_data.obj_type) { 739 case HAMMER_OBJTYPE_CDEV: 740 case HAMMER_OBJTYPE_BDEV: 741 vap->va_rmajor = ip->ino_data.rmajor; 742 vap->va_rminor = ip->ino_data.rminor; 743 break; 744 default: 745 break; 746 } 747 return(0); 748 } 749 750 /* 751 * hammer_vop_nresolve { nch, dvp, cred } 752 * 753 * Locate the requested directory entry. 754 */ 755 static 756 int 757 hammer_vop_nresolve(struct vop_nresolve_args *ap) 758 { 759 struct hammer_transaction trans; 760 struct namecache *ncp; 761 hammer_inode_t dip; 762 hammer_inode_t ip; 763 hammer_tid_t asof; 764 struct hammer_cursor cursor; 765 struct vnode *vp; 766 int64_t namekey; 767 int error; 768 int i; 769 int nlen; 770 int flags; 771 int ispfs; 772 int64_t obj_id; 773 u_int32_t localization; 774 775 /* 776 * Misc initialization, plus handle as-of name extensions. Look for 777 * the '@@' extension. Note that as-of files and directories cannot 778 * be modified. 779 */ 780 dip = VTOI(ap->a_dvp); 781 ncp = ap->a_nch->ncp; 782 asof = dip->obj_asof; 783 nlen = ncp->nc_nlen; 784 flags = dip->flags & HAMMER_INODE_RO; 785 ispfs = 0; 786 787 hammer_simple_transaction(&trans, dip->hmp); 788 ++hammer_stats_file_iopsr; 789 790 for (i = 0; i < nlen; ++i) { 791 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') { 792 asof = hammer_str_to_tid(ncp->nc_name + i + 2, 793 &ispfs, &localization); 794 if (asof != HAMMER_MAX_TID) 795 flags |= HAMMER_INODE_RO; 796 break; 797 } 798 } 799 nlen = i; 800 801 /* 802 * If this is a PFS softlink we dive into the PFS 803 */ 804 if (ispfs && nlen == 0) { 805 ip = hammer_get_inode(&trans, dip, HAMMER_OBJID_ROOT, 806 asof, localization, 807 flags, &error); 808 if (error == 0) { 809 error = hammer_get_vnode(ip, &vp); 810 hammer_rel_inode(ip, 0); 811 } else { 812 vp = NULL; 813 } 814 if (error == 0) { 815 vn_unlock(vp); 816 cache_setvp(ap->a_nch, vp); 817 vrele(vp); 818 } 819 goto done; 820 } 821 822 /* 823 * If there is no path component the time extension is relative to 824 * dip. 825 */ 826 if (nlen == 0) { 827 ip = hammer_get_inode(&trans, dip, dip->obj_id, 828 asof, dip->obj_localization, 829 flags, &error); 830 if (error == 0) { 831 error = hammer_get_vnode(ip, &vp); 832 hammer_rel_inode(ip, 0); 833 } else { 834 vp = NULL; 835 } 836 if (error == 0) { 837 vn_unlock(vp); 838 cache_setvp(ap->a_nch, vp); 839 vrele(vp); 840 } 841 goto done; 842 } 843 844 /* 845 * Calculate the namekey and setup the key range for the scan. This 846 * works kinda like a chained hash table where the lower 32 bits 847 * of the namekey synthesize the chain. 848 * 849 * The key range is inclusive of both key_beg and key_end. 850 */ 851 namekey = hammer_directory_namekey(ncp->nc_name, nlen); 852 853 error = hammer_init_cursor(&trans, &cursor, &dip->cache[1], dip); 854 cursor.key_beg.localization = dip->obj_localization + 855 HAMMER_LOCALIZE_MISC; 856 cursor.key_beg.obj_id = dip->obj_id; 857 cursor.key_beg.key = namekey; 858 cursor.key_beg.create_tid = 0; 859 cursor.key_beg.delete_tid = 0; 860 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 861 cursor.key_beg.obj_type = 0; 862 863 cursor.key_end = cursor.key_beg; 864 cursor.key_end.key |= 0xFFFFFFFFULL; 865 cursor.asof = asof; 866 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 867 868 /* 869 * Scan all matching records (the chain), locate the one matching 870 * the requested path component. 871 * 872 * The hammer_ip_*() functions merge in-memory records with on-disk 873 * records for the purposes of the search. 874 */ 875 obj_id = 0; 876 localization = HAMMER_DEF_LOCALIZATION; 877 878 if (error == 0) { 879 error = hammer_ip_first(&cursor); 880 while (error == 0) { 881 error = hammer_ip_resolve_data(&cursor); 882 if (error) 883 break; 884 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF && 885 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) { 886 obj_id = cursor.data->entry.obj_id; 887 localization = cursor.data->entry.localization; 888 break; 889 } 890 error = hammer_ip_next(&cursor); 891 } 892 } 893 hammer_done_cursor(&cursor); 894 if (error == 0) { 895 ip = hammer_get_inode(&trans, dip, obj_id, 896 asof, localization, 897 flags, &error); 898 if (error == 0) { 899 error = hammer_get_vnode(ip, &vp); 900 hammer_rel_inode(ip, 0); 901 } else { 902 vp = NULL; 903 } 904 if (error == 0) { 905 vn_unlock(vp); 906 cache_setvp(ap->a_nch, vp); 907 vrele(vp); 908 } 909 } else if (error == ENOENT) { 910 cache_setvp(ap->a_nch, NULL); 911 } 912 done: 913 hammer_done_transaction(&trans); 914 return (error); 915 } 916 917 /* 918 * hammer_vop_nlookupdotdot { dvp, vpp, cred } 919 * 920 * Locate the parent directory of a directory vnode. 921 * 922 * dvp is referenced but not locked. *vpp must be returned referenced and 923 * locked. A parent_obj_id of 0 does not necessarily indicate that we are 924 * at the root, instead it could indicate that the directory we were in was 925 * removed. 926 * 927 * NOTE: as-of sequences are not linked into the directory structure. If 928 * we are at the root with a different asof then the mount point, reload 929 * the same directory with the mount point's asof. I'm not sure what this 930 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not 931 * get confused, but it hasn't been tested. 932 */ 933 static 934 int 935 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 936 { 937 struct hammer_transaction trans; 938 struct hammer_inode *dip; 939 struct hammer_inode *ip; 940 int64_t parent_obj_id; 941 u_int32_t parent_obj_localization; 942 hammer_tid_t asof; 943 int error; 944 945 dip = VTOI(ap->a_dvp); 946 asof = dip->obj_asof; 947 948 /* 949 * Whos are parent? This could be the root of a pseudo-filesystem 950 * whos parent is in another localization domain. 951 */ 952 parent_obj_id = dip->ino_data.parent_obj_id; 953 if (dip->obj_id == HAMMER_OBJID_ROOT) 954 parent_obj_localization = dip->ino_data.ext.obj.parent_obj_localization; 955 else 956 parent_obj_localization = dip->obj_localization; 957 958 if (parent_obj_id == 0) { 959 if (dip->obj_id == HAMMER_OBJID_ROOT && 960 asof != dip->hmp->asof) { 961 parent_obj_id = dip->obj_id; 962 asof = dip->hmp->asof; 963 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK); 964 ksnprintf(*ap->a_fakename, 19, "0x%016llx", 965 dip->obj_asof); 966 } else { 967 *ap->a_vpp = NULL; 968 return ENOENT; 969 } 970 } 971 972 hammer_simple_transaction(&trans, dip->hmp); 973 ++hammer_stats_file_iopsr; 974 975 ip = hammer_get_inode(&trans, dip, parent_obj_id, 976 asof, parent_obj_localization, 977 dip->flags, &error); 978 if (ip) { 979 error = hammer_get_vnode(ip, ap->a_vpp); 980 hammer_rel_inode(ip, 0); 981 } else { 982 *ap->a_vpp = NULL; 983 } 984 hammer_done_transaction(&trans); 985 return (error); 986 } 987 988 /* 989 * hammer_vop_nlink { nch, dvp, vp, cred } 990 */ 991 static 992 int 993 hammer_vop_nlink(struct vop_nlink_args *ap) 994 { 995 struct hammer_transaction trans; 996 struct hammer_inode *dip; 997 struct hammer_inode *ip; 998 struct nchandle *nch; 999 int error; 1000 1001 if (ap->a_dvp->v_mount != ap->a_vp->v_mount) 1002 return(EXDEV); 1003 1004 nch = ap->a_nch; 1005 dip = VTOI(ap->a_dvp); 1006 ip = VTOI(ap->a_vp); 1007 1008 if (dip->obj_localization != ip->obj_localization) 1009 return(EXDEV); 1010 1011 if (dip->flags & HAMMER_INODE_RO) 1012 return (EROFS); 1013 if (ip->flags & HAMMER_INODE_RO) 1014 return (EROFS); 1015 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1016 return (error); 1017 1018 /* 1019 * Create a transaction to cover the operations we perform. 1020 */ 1021 hammer_start_transaction(&trans, dip->hmp); 1022 ++hammer_stats_file_iopsw; 1023 1024 /* 1025 * Add the filesystem object to the directory. Note that neither 1026 * dip nor ip are referenced or locked, but their vnodes are 1027 * referenced. This function will bump the inode's link count. 1028 */ 1029 error = hammer_ip_add_directory(&trans, dip, 1030 nch->ncp->nc_name, nch->ncp->nc_nlen, 1031 ip); 1032 1033 /* 1034 * Finish up. 1035 */ 1036 if (error == 0) { 1037 cache_setunresolved(nch); 1038 cache_setvp(nch, ap->a_vp); 1039 } 1040 hammer_done_transaction(&trans); 1041 return (error); 1042 } 1043 1044 /* 1045 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap } 1046 * 1047 * The operating system has already ensured that the directory entry 1048 * does not exist and done all appropriate namespace locking. 1049 */ 1050 static 1051 int 1052 hammer_vop_nmkdir(struct vop_nmkdir_args *ap) 1053 { 1054 struct hammer_transaction trans; 1055 struct hammer_inode *dip; 1056 struct hammer_inode *nip; 1057 struct nchandle *nch; 1058 int error; 1059 1060 nch = ap->a_nch; 1061 dip = VTOI(ap->a_dvp); 1062 1063 if (dip->flags & HAMMER_INODE_RO) 1064 return (EROFS); 1065 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1066 return (error); 1067 1068 /* 1069 * Create a transaction to cover the operations we perform. 1070 */ 1071 hammer_start_transaction(&trans, dip->hmp); 1072 ++hammer_stats_file_iopsw; 1073 1074 /* 1075 * Create a new filesystem object of the requested type. The 1076 * returned inode will be referenced but not locked. 1077 */ 1078 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1079 dip, NULL, &nip); 1080 if (error) { 1081 hkprintf("hammer_mkdir error %d\n", error); 1082 hammer_done_transaction(&trans); 1083 *ap->a_vpp = NULL; 1084 return (error); 1085 } 1086 /* 1087 * Add the new filesystem object to the directory. This will also 1088 * bump the inode's link count. 1089 */ 1090 error = hammer_ip_add_directory(&trans, dip, 1091 nch->ncp->nc_name, nch->ncp->nc_nlen, 1092 nip); 1093 if (error) 1094 hkprintf("hammer_mkdir (add) error %d\n", error); 1095 1096 /* 1097 * Finish up. 1098 */ 1099 if (error) { 1100 hammer_rel_inode(nip, 0); 1101 *ap->a_vpp = NULL; 1102 } else { 1103 error = hammer_get_vnode(nip, ap->a_vpp); 1104 hammer_rel_inode(nip, 0); 1105 if (error == 0) { 1106 cache_setunresolved(ap->a_nch); 1107 cache_setvp(ap->a_nch, *ap->a_vpp); 1108 } 1109 } 1110 hammer_done_transaction(&trans); 1111 return (error); 1112 } 1113 1114 /* 1115 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap } 1116 * 1117 * The operating system has already ensured that the directory entry 1118 * does not exist and done all appropriate namespace locking. 1119 */ 1120 static 1121 int 1122 hammer_vop_nmknod(struct vop_nmknod_args *ap) 1123 { 1124 struct hammer_transaction trans; 1125 struct hammer_inode *dip; 1126 struct hammer_inode *nip; 1127 struct nchandle *nch; 1128 int error; 1129 1130 nch = ap->a_nch; 1131 dip = VTOI(ap->a_dvp); 1132 1133 if (dip->flags & HAMMER_INODE_RO) 1134 return (EROFS); 1135 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1136 return (error); 1137 1138 /* 1139 * Create a transaction to cover the operations we perform. 1140 */ 1141 hammer_start_transaction(&trans, dip->hmp); 1142 ++hammer_stats_file_iopsw; 1143 1144 /* 1145 * Create a new filesystem object of the requested type. The 1146 * returned inode will be referenced but not locked. 1147 * 1148 * If mknod specifies a directory a pseudo-fs is created. 1149 */ 1150 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1151 dip, NULL, &nip); 1152 if (error) { 1153 hammer_done_transaction(&trans); 1154 *ap->a_vpp = NULL; 1155 return (error); 1156 } 1157 1158 /* 1159 * Add the new filesystem object to the directory. This will also 1160 * bump the inode's link count. 1161 */ 1162 error = hammer_ip_add_directory(&trans, dip, 1163 nch->ncp->nc_name, nch->ncp->nc_nlen, 1164 nip); 1165 1166 /* 1167 * Finish up. 1168 */ 1169 if (error) { 1170 hammer_rel_inode(nip, 0); 1171 *ap->a_vpp = NULL; 1172 } else { 1173 error = hammer_get_vnode(nip, ap->a_vpp); 1174 hammer_rel_inode(nip, 0); 1175 if (error == 0) { 1176 cache_setunresolved(ap->a_nch); 1177 cache_setvp(ap->a_nch, *ap->a_vpp); 1178 } 1179 } 1180 hammer_done_transaction(&trans); 1181 return (error); 1182 } 1183 1184 /* 1185 * hammer_vop_open { vp, mode, cred, fp } 1186 */ 1187 static 1188 int 1189 hammer_vop_open(struct vop_open_args *ap) 1190 { 1191 hammer_inode_t ip; 1192 1193 ++hammer_stats_file_iopsr; 1194 ip = VTOI(ap->a_vp); 1195 1196 if ((ap->a_mode & FWRITE) && (ip->flags & HAMMER_INODE_RO)) 1197 return (EROFS); 1198 return(vop_stdopen(ap)); 1199 } 1200 1201 /* 1202 * hammer_vop_pathconf { vp, name, retval } 1203 */ 1204 static 1205 int 1206 hammer_vop_pathconf(struct vop_pathconf_args *ap) 1207 { 1208 return EOPNOTSUPP; 1209 } 1210 1211 /* 1212 * hammer_vop_print { vp } 1213 */ 1214 static 1215 int 1216 hammer_vop_print(struct vop_print_args *ap) 1217 { 1218 return EOPNOTSUPP; 1219 } 1220 1221 /* 1222 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies } 1223 */ 1224 static 1225 int 1226 hammer_vop_readdir(struct vop_readdir_args *ap) 1227 { 1228 struct hammer_transaction trans; 1229 struct hammer_cursor cursor; 1230 struct hammer_inode *ip; 1231 struct uio *uio; 1232 hammer_base_elm_t base; 1233 int error; 1234 int cookie_index; 1235 int ncookies; 1236 off_t *cookies; 1237 off_t saveoff; 1238 int r; 1239 int dtype; 1240 1241 ++hammer_stats_file_iopsr; 1242 ip = VTOI(ap->a_vp); 1243 uio = ap->a_uio; 1244 saveoff = uio->uio_offset; 1245 1246 if (ap->a_ncookies) { 1247 ncookies = uio->uio_resid / 16 + 1; 1248 if (ncookies > 1024) 1249 ncookies = 1024; 1250 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); 1251 cookie_index = 0; 1252 } else { 1253 ncookies = -1; 1254 cookies = NULL; 1255 cookie_index = 0; 1256 } 1257 1258 hammer_simple_transaction(&trans, ip->hmp); 1259 1260 /* 1261 * Handle artificial entries 1262 */ 1263 error = 0; 1264 if (saveoff == 0) { 1265 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, "."); 1266 if (r) 1267 goto done; 1268 if (cookies) 1269 cookies[cookie_index] = saveoff; 1270 ++saveoff; 1271 ++cookie_index; 1272 if (cookie_index == ncookies) 1273 goto done; 1274 } 1275 if (saveoff == 1) { 1276 if (ip->ino_data.parent_obj_id) { 1277 r = vop_write_dirent(&error, uio, 1278 ip->ino_data.parent_obj_id, 1279 DT_DIR, 2, ".."); 1280 } else { 1281 r = vop_write_dirent(&error, uio, 1282 ip->obj_id, DT_DIR, 2, ".."); 1283 } 1284 if (r) 1285 goto done; 1286 if (cookies) 1287 cookies[cookie_index] = saveoff; 1288 ++saveoff; 1289 ++cookie_index; 1290 if (cookie_index == ncookies) 1291 goto done; 1292 } 1293 1294 /* 1295 * Key range (begin and end inclusive) to scan. Directory keys 1296 * directly translate to a 64 bit 'seek' position. 1297 */ 1298 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 1299 cursor.key_beg.localization = ip->obj_localization + 1300 HAMMER_LOCALIZE_MISC; 1301 cursor.key_beg.obj_id = ip->obj_id; 1302 cursor.key_beg.create_tid = 0; 1303 cursor.key_beg.delete_tid = 0; 1304 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1305 cursor.key_beg.obj_type = 0; 1306 cursor.key_beg.key = saveoff; 1307 1308 cursor.key_end = cursor.key_beg; 1309 cursor.key_end.key = HAMMER_MAX_KEY; 1310 cursor.asof = ip->obj_asof; 1311 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1312 1313 error = hammer_ip_first(&cursor); 1314 1315 while (error == 0) { 1316 error = hammer_ip_resolve_data(&cursor); 1317 if (error) 1318 break; 1319 base = &cursor.leaf->base; 1320 saveoff = base->key; 1321 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF); 1322 1323 if (base->obj_id != ip->obj_id) 1324 panic("readdir: bad record at %p", cursor.node); 1325 1326 /* 1327 * Convert pseudo-filesystems into softlinks 1328 */ 1329 dtype = hammer_get_dtype(cursor.leaf->base.obj_type); 1330 r = vop_write_dirent( 1331 &error, uio, cursor.data->entry.obj_id, 1332 dtype, 1333 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF , 1334 (void *)cursor.data->entry.name); 1335 if (r) 1336 break; 1337 ++saveoff; 1338 if (cookies) 1339 cookies[cookie_index] = base->key; 1340 ++cookie_index; 1341 if (cookie_index == ncookies) 1342 break; 1343 error = hammer_ip_next(&cursor); 1344 } 1345 hammer_done_cursor(&cursor); 1346 1347 done: 1348 hammer_done_transaction(&trans); 1349 1350 if (ap->a_eofflag) 1351 *ap->a_eofflag = (error == ENOENT); 1352 uio->uio_offset = saveoff; 1353 if (error && cookie_index == 0) { 1354 if (error == ENOENT) 1355 error = 0; 1356 if (cookies) { 1357 kfree(cookies, M_TEMP); 1358 *ap->a_ncookies = 0; 1359 *ap->a_cookies = NULL; 1360 } 1361 } else { 1362 if (error == ENOENT) 1363 error = 0; 1364 if (cookies) { 1365 *ap->a_ncookies = cookie_index; 1366 *ap->a_cookies = cookies; 1367 } 1368 } 1369 return(error); 1370 } 1371 1372 /* 1373 * hammer_vop_readlink { vp, uio, cred } 1374 */ 1375 static 1376 int 1377 hammer_vop_readlink(struct vop_readlink_args *ap) 1378 { 1379 struct hammer_transaction trans; 1380 struct hammer_cursor cursor; 1381 struct hammer_inode *ip; 1382 char buf[32]; 1383 u_int32_t localization; 1384 hammer_pseudofs_inmem_t pfsm; 1385 int error; 1386 1387 ip = VTOI(ap->a_vp); 1388 1389 /* 1390 * Shortcut if the symlink data was stuffed into ino_data. 1391 * 1392 * Also expand special "@@PFS%05d" softlinks (expansion only 1393 * occurs for non-historical (current) accesses made from the 1394 * primary filesystem). 1395 */ 1396 if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) { 1397 char *ptr; 1398 int bytes; 1399 1400 ptr = ip->ino_data.ext.symlink; 1401 bytes = (int)ip->ino_data.size; 1402 if (bytes == 10 && 1403 ip->obj_asof == HAMMER_MAX_TID && 1404 ip->obj_localization == 0 && 1405 strncmp(ptr, "@@PFS", 5) == 0) { 1406 hammer_simple_transaction(&trans, ip->hmp); 1407 bcopy(ptr + 5, buf, 5); 1408 buf[5] = 0; 1409 localization = strtoul(buf, NULL, 10) << 16; 1410 pfsm = hammer_load_pseudofs(&trans, localization, 1411 &error); 1412 if (error == 0) { 1413 if (pfsm->pfsd.mirror_flags & 1414 HAMMER_PFSD_SLAVE) { 1415 ksnprintf(buf, sizeof(buf), 1416 "@@0x%016llx:%05d", 1417 pfsm->pfsd.sync_end_tid, 1418 localization >> 16); 1419 } else { 1420 ksnprintf(buf, sizeof(buf), 1421 "@@0x%016llx:%05d", 1422 HAMMER_MAX_TID, 1423 localization >> 16); 1424 } 1425 ptr = buf; 1426 bytes = strlen(buf); 1427 } 1428 if (pfsm) 1429 hammer_rel_pseudofs(trans.hmp, pfsm); 1430 hammer_done_transaction(&trans); 1431 } 1432 error = uiomove(ptr, bytes, ap->a_uio); 1433 return(error); 1434 } 1435 1436 /* 1437 * Long version 1438 */ 1439 hammer_simple_transaction(&trans, ip->hmp); 1440 ++hammer_stats_file_iopsr; 1441 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 1442 1443 /* 1444 * Key range (begin and end inclusive) to scan. Directory keys 1445 * directly translate to a 64 bit 'seek' position. 1446 */ 1447 cursor.key_beg.localization = ip->obj_localization + 1448 HAMMER_LOCALIZE_MISC; 1449 cursor.key_beg.obj_id = ip->obj_id; 1450 cursor.key_beg.create_tid = 0; 1451 cursor.key_beg.delete_tid = 0; 1452 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX; 1453 cursor.key_beg.obj_type = 0; 1454 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK; 1455 cursor.asof = ip->obj_asof; 1456 cursor.flags |= HAMMER_CURSOR_ASOF; 1457 1458 error = hammer_ip_lookup(&cursor); 1459 if (error == 0) { 1460 error = hammer_ip_resolve_data(&cursor); 1461 if (error == 0) { 1462 KKASSERT(cursor.leaf->data_len >= 1463 HAMMER_SYMLINK_NAME_OFF); 1464 error = uiomove(cursor.data->symlink.name, 1465 cursor.leaf->data_len - 1466 HAMMER_SYMLINK_NAME_OFF, 1467 ap->a_uio); 1468 } 1469 } 1470 hammer_done_cursor(&cursor); 1471 hammer_done_transaction(&trans); 1472 return(error); 1473 } 1474 1475 /* 1476 * hammer_vop_nremove { nch, dvp, cred } 1477 */ 1478 static 1479 int 1480 hammer_vop_nremove(struct vop_nremove_args *ap) 1481 { 1482 struct hammer_transaction trans; 1483 struct hammer_inode *dip; 1484 int error; 1485 1486 dip = VTOI(ap->a_dvp); 1487 1488 if (hammer_nohistory(dip) == 0 && 1489 (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) { 1490 return (error); 1491 } 1492 1493 hammer_start_transaction(&trans, dip->hmp); 1494 ++hammer_stats_file_iopsw; 1495 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 0); 1496 hammer_done_transaction(&trans); 1497 1498 return (error); 1499 } 1500 1501 /* 1502 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred } 1503 */ 1504 static 1505 int 1506 hammer_vop_nrename(struct vop_nrename_args *ap) 1507 { 1508 struct hammer_transaction trans; 1509 struct namecache *fncp; 1510 struct namecache *tncp; 1511 struct hammer_inode *fdip; 1512 struct hammer_inode *tdip; 1513 struct hammer_inode *ip; 1514 struct hammer_cursor cursor; 1515 int64_t namekey; 1516 int nlen, error; 1517 1518 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) 1519 return(EXDEV); 1520 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount) 1521 return(EXDEV); 1522 1523 fdip = VTOI(ap->a_fdvp); 1524 tdip = VTOI(ap->a_tdvp); 1525 fncp = ap->a_fnch->ncp; 1526 tncp = ap->a_tnch->ncp; 1527 ip = VTOI(fncp->nc_vp); 1528 KKASSERT(ip != NULL); 1529 1530 if (fdip->obj_localization != tdip->obj_localization) 1531 return(EXDEV); 1532 if (fdip->obj_localization != ip->obj_localization) 1533 return(EXDEV); 1534 1535 if (fdip->flags & HAMMER_INODE_RO) 1536 return (EROFS); 1537 if (tdip->flags & HAMMER_INODE_RO) 1538 return (EROFS); 1539 if (ip->flags & HAMMER_INODE_RO) 1540 return (EROFS); 1541 if ((error = hammer_checkspace(fdip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1542 return (error); 1543 1544 hammer_start_transaction(&trans, fdip->hmp); 1545 ++hammer_stats_file_iopsw; 1546 1547 /* 1548 * Remove tncp from the target directory and then link ip as 1549 * tncp. XXX pass trans to dounlink 1550 * 1551 * Force the inode sync-time to match the transaction so it is 1552 * in-sync with the creation of the target directory entry. 1553 */ 1554 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, 1555 ap->a_cred, 0, -1); 1556 if (error == 0 || error == ENOENT) { 1557 error = hammer_ip_add_directory(&trans, tdip, 1558 tncp->nc_name, tncp->nc_nlen, 1559 ip); 1560 if (error == 0) { 1561 ip->ino_data.parent_obj_id = tdip->obj_id; 1562 hammer_modify_inode(ip, HAMMER_INODE_DDIRTY); 1563 } 1564 } 1565 if (error) 1566 goto failed; /* XXX */ 1567 1568 /* 1569 * Locate the record in the originating directory and remove it. 1570 * 1571 * Calculate the namekey and setup the key range for the scan. This 1572 * works kinda like a chained hash table where the lower 32 bits 1573 * of the namekey synthesize the chain. 1574 * 1575 * The key range is inclusive of both key_beg and key_end. 1576 */ 1577 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen); 1578 retry: 1579 hammer_init_cursor(&trans, &cursor, &fdip->cache[1], fdip); 1580 cursor.key_beg.localization = fdip->obj_localization + 1581 HAMMER_LOCALIZE_MISC; 1582 cursor.key_beg.obj_id = fdip->obj_id; 1583 cursor.key_beg.key = namekey; 1584 cursor.key_beg.create_tid = 0; 1585 cursor.key_beg.delete_tid = 0; 1586 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 1587 cursor.key_beg.obj_type = 0; 1588 1589 cursor.key_end = cursor.key_beg; 1590 cursor.key_end.key |= 0xFFFFFFFFULL; 1591 cursor.asof = fdip->obj_asof; 1592 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 1593 1594 /* 1595 * Scan all matching records (the chain), locate the one matching 1596 * the requested path component. 1597 * 1598 * The hammer_ip_*() functions merge in-memory records with on-disk 1599 * records for the purposes of the search. 1600 */ 1601 error = hammer_ip_first(&cursor); 1602 while (error == 0) { 1603 if (hammer_ip_resolve_data(&cursor) != 0) 1604 break; 1605 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF; 1606 KKASSERT(nlen > 0); 1607 if (fncp->nc_nlen == nlen && 1608 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) { 1609 break; 1610 } 1611 error = hammer_ip_next(&cursor); 1612 } 1613 1614 /* 1615 * If all is ok we have to get the inode so we can adjust nlinks. 1616 * 1617 * WARNING: hammer_ip_del_directory() may have to terminate the 1618 * cursor to avoid a recursion. It's ok to call hammer_done_cursor() 1619 * twice. 1620 */ 1621 if (error == 0) 1622 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip); 1623 1624 /* 1625 * XXX A deadlock here will break rename's atomicy for the purposes 1626 * of crash recovery. 1627 */ 1628 if (error == EDEADLK) { 1629 hammer_done_cursor(&cursor); 1630 goto retry; 1631 } 1632 1633 /* 1634 * Cleanup and tell the kernel that the rename succeeded. 1635 */ 1636 hammer_done_cursor(&cursor); 1637 if (error == 0) 1638 cache_rename(ap->a_fnch, ap->a_tnch); 1639 1640 failed: 1641 hammer_done_transaction(&trans); 1642 return (error); 1643 } 1644 1645 /* 1646 * hammer_vop_nrmdir { nch, dvp, cred } 1647 */ 1648 static 1649 int 1650 hammer_vop_nrmdir(struct vop_nrmdir_args *ap) 1651 { 1652 struct hammer_transaction trans; 1653 struct hammer_inode *dip; 1654 int error; 1655 1656 dip = VTOI(ap->a_dvp); 1657 1658 if (hammer_nohistory(dip) == 0 && 1659 (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) { 1660 return (error); 1661 } 1662 1663 hammer_start_transaction(&trans, dip->hmp); 1664 ++hammer_stats_file_iopsw; 1665 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 1); 1666 hammer_done_transaction(&trans); 1667 1668 return (error); 1669 } 1670 1671 /* 1672 * hammer_vop_setattr { vp, vap, cred } 1673 */ 1674 static 1675 int 1676 hammer_vop_setattr(struct vop_setattr_args *ap) 1677 { 1678 struct hammer_transaction trans; 1679 struct vattr *vap; 1680 struct hammer_inode *ip; 1681 int modflags; 1682 int error; 1683 int truncating; 1684 int blksize; 1685 int64_t aligned_size; 1686 u_int32_t flags; 1687 1688 vap = ap->a_vap; 1689 ip = ap->a_vp->v_data; 1690 modflags = 0; 1691 1692 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1693 return(EROFS); 1694 if (ip->flags & HAMMER_INODE_RO) 1695 return (EROFS); 1696 if (hammer_nohistory(ip) == 0 && 1697 (error = hammer_checkspace(ip->hmp, HAMMER_CHKSPC_REMOVE)) != 0) { 1698 return (error); 1699 } 1700 1701 hammer_start_transaction(&trans, ip->hmp); 1702 ++hammer_stats_file_iopsw; 1703 error = 0; 1704 1705 if (vap->va_flags != VNOVAL) { 1706 flags = ip->ino_data.uflags; 1707 error = vop_helper_setattr_flags(&flags, vap->va_flags, 1708 hammer_to_unix_xid(&ip->ino_data.uid), 1709 ap->a_cred); 1710 if (error == 0) { 1711 if (ip->ino_data.uflags != flags) { 1712 ip->ino_data.uflags = flags; 1713 modflags |= HAMMER_INODE_DDIRTY; 1714 } 1715 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 1716 error = 0; 1717 goto done; 1718 } 1719 } 1720 goto done; 1721 } 1722 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) { 1723 error = EPERM; 1724 goto done; 1725 } 1726 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 1727 mode_t cur_mode = ip->ino_data.mode; 1728 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid); 1729 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid); 1730 uuid_t uuid_uid; 1731 uuid_t uuid_gid; 1732 1733 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid, 1734 ap->a_cred, 1735 &cur_uid, &cur_gid, &cur_mode); 1736 if (error == 0) { 1737 hammer_guid_to_uuid(&uuid_uid, cur_uid); 1738 hammer_guid_to_uuid(&uuid_gid, cur_gid); 1739 if (bcmp(&uuid_uid, &ip->ino_data.uid, 1740 sizeof(uuid_uid)) || 1741 bcmp(&uuid_gid, &ip->ino_data.gid, 1742 sizeof(uuid_gid)) || 1743 ip->ino_data.mode != cur_mode 1744 ) { 1745 ip->ino_data.uid = uuid_uid; 1746 ip->ino_data.gid = uuid_gid; 1747 ip->ino_data.mode = cur_mode; 1748 } 1749 modflags |= HAMMER_INODE_DDIRTY; 1750 } 1751 } 1752 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) { 1753 switch(ap->a_vp->v_type) { 1754 case VREG: 1755 if (vap->va_size == ip->ino_data.size) 1756 break; 1757 /* 1758 * XXX break atomicy, we can deadlock the backend 1759 * if we do not release the lock. Probably not a 1760 * big deal here. 1761 */ 1762 blksize = hammer_blocksize(vap->va_size); 1763 if (vap->va_size < ip->ino_data.size) { 1764 vtruncbuf(ap->a_vp, vap->va_size, blksize); 1765 truncating = 1; 1766 } else { 1767 vnode_pager_setsize(ap->a_vp, vap->va_size); 1768 truncating = 0; 1769 } 1770 ip->ino_data.size = vap->va_size; 1771 modflags |= HAMMER_INODE_DDIRTY; 1772 1773 /* 1774 * on-media truncation is cached in the inode until 1775 * the inode is synchronized. 1776 */ 1777 if (truncating) { 1778 hammer_ip_frontend_trunc(ip, vap->va_size); 1779 #ifdef DEBUG_TRUNCATE 1780 if (HammerTruncIp == NULL) 1781 HammerTruncIp = ip; 1782 #endif 1783 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) { 1784 ip->flags |= HAMMER_INODE_TRUNCATED; 1785 ip->trunc_off = vap->va_size; 1786 #ifdef DEBUG_TRUNCATE 1787 if (ip == HammerTruncIp) 1788 kprintf("truncate1 %016llx\n", ip->trunc_off); 1789 #endif 1790 } else if (ip->trunc_off > vap->va_size) { 1791 ip->trunc_off = vap->va_size; 1792 #ifdef DEBUG_TRUNCATE 1793 if (ip == HammerTruncIp) 1794 kprintf("truncate2 %016llx\n", ip->trunc_off); 1795 #endif 1796 } else { 1797 #ifdef DEBUG_TRUNCATE 1798 if (ip == HammerTruncIp) 1799 kprintf("truncate3 %016llx (ignored)\n", vap->va_size); 1800 #endif 1801 } 1802 } 1803 1804 /* 1805 * If truncating we have to clean out a portion of 1806 * the last block on-disk. We do this in the 1807 * front-end buffer cache. 1808 */ 1809 aligned_size = (vap->va_size + (blksize - 1)) & 1810 ~(int64_t)(blksize - 1); 1811 if (truncating && vap->va_size < aligned_size) { 1812 struct buf *bp; 1813 int offset; 1814 1815 aligned_size -= blksize; 1816 1817 offset = (int)vap->va_size & (blksize - 1); 1818 error = bread(ap->a_vp, aligned_size, 1819 blksize, &bp); 1820 hammer_ip_frontend_trunc(ip, aligned_size); 1821 if (error == 0) { 1822 bzero(bp->b_data + offset, 1823 blksize - offset); 1824 /* must de-cache direct-io offset */ 1825 bp->b_bio2.bio_offset = NOOFFSET; 1826 bdwrite(bp); 1827 } else { 1828 kprintf("ERROR %d\n", error); 1829 brelse(bp); 1830 } 1831 } 1832 break; 1833 case VDATABASE: 1834 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) { 1835 ip->flags |= HAMMER_INODE_TRUNCATED; 1836 ip->trunc_off = vap->va_size; 1837 } else if (ip->trunc_off > vap->va_size) { 1838 ip->trunc_off = vap->va_size; 1839 } 1840 hammer_ip_frontend_trunc(ip, vap->va_size); 1841 ip->ino_data.size = vap->va_size; 1842 modflags |= HAMMER_INODE_DDIRTY; 1843 break; 1844 default: 1845 error = EINVAL; 1846 goto done; 1847 } 1848 break; 1849 } 1850 if (vap->va_atime.tv_sec != VNOVAL) { 1851 ip->ino_data.atime = 1852 hammer_timespec_to_time(&vap->va_atime); 1853 modflags |= HAMMER_INODE_ATIME; 1854 } 1855 if (vap->va_mtime.tv_sec != VNOVAL) { 1856 ip->ino_data.mtime = 1857 hammer_timespec_to_time(&vap->va_mtime); 1858 modflags |= HAMMER_INODE_MTIME; 1859 } 1860 if (vap->va_mode != (mode_t)VNOVAL) { 1861 mode_t cur_mode = ip->ino_data.mode; 1862 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid); 1863 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid); 1864 1865 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, 1866 cur_uid, cur_gid, &cur_mode); 1867 if (error == 0 && ip->ino_data.mode != cur_mode) { 1868 ip->ino_data.mode = cur_mode; 1869 modflags |= HAMMER_INODE_DDIRTY; 1870 } 1871 } 1872 done: 1873 if (error == 0) 1874 hammer_modify_inode(ip, modflags); 1875 hammer_done_transaction(&trans); 1876 return (error); 1877 } 1878 1879 /* 1880 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target } 1881 */ 1882 static 1883 int 1884 hammer_vop_nsymlink(struct vop_nsymlink_args *ap) 1885 { 1886 struct hammer_transaction trans; 1887 struct hammer_inode *dip; 1888 struct hammer_inode *nip; 1889 struct nchandle *nch; 1890 hammer_record_t record; 1891 int error; 1892 int bytes; 1893 1894 ap->a_vap->va_type = VLNK; 1895 1896 nch = ap->a_nch; 1897 dip = VTOI(ap->a_dvp); 1898 1899 if (dip->flags & HAMMER_INODE_RO) 1900 return (EROFS); 1901 if ((error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) 1902 return (error); 1903 1904 /* 1905 * Create a transaction to cover the operations we perform. 1906 */ 1907 hammer_start_transaction(&trans, dip->hmp); 1908 ++hammer_stats_file_iopsw; 1909 1910 /* 1911 * Create a new filesystem object of the requested type. The 1912 * returned inode will be referenced but not locked. 1913 */ 1914 1915 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, 1916 dip, NULL, &nip); 1917 if (error) { 1918 hammer_done_transaction(&trans); 1919 *ap->a_vpp = NULL; 1920 return (error); 1921 } 1922 1923 /* 1924 * Add a record representing the symlink. symlink stores the link 1925 * as pure data, not a string, and is no \0 terminated. 1926 */ 1927 if (error == 0) { 1928 bytes = strlen(ap->a_target); 1929 1930 if (bytes <= HAMMER_INODE_BASESYMLEN) { 1931 bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes); 1932 } else { 1933 record = hammer_alloc_mem_record(nip, bytes); 1934 record->type = HAMMER_MEM_RECORD_GENERAL; 1935 1936 record->leaf.base.localization = nip->obj_localization + 1937 HAMMER_LOCALIZE_MISC; 1938 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK; 1939 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX; 1940 record->leaf.data_len = bytes; 1941 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0); 1942 bcopy(ap->a_target, record->data->symlink.name, bytes); 1943 error = hammer_ip_add_record(&trans, record); 1944 } 1945 1946 /* 1947 * Set the file size to the length of the link. 1948 */ 1949 if (error == 0) { 1950 nip->ino_data.size = bytes; 1951 hammer_modify_inode(nip, HAMMER_INODE_DDIRTY); 1952 } 1953 } 1954 if (error == 0) 1955 error = hammer_ip_add_directory(&trans, dip, nch->ncp->nc_name, 1956 nch->ncp->nc_nlen, nip); 1957 1958 /* 1959 * Finish up. 1960 */ 1961 if (error) { 1962 hammer_rel_inode(nip, 0); 1963 *ap->a_vpp = NULL; 1964 } else { 1965 error = hammer_get_vnode(nip, ap->a_vpp); 1966 hammer_rel_inode(nip, 0); 1967 if (error == 0) { 1968 cache_setunresolved(ap->a_nch); 1969 cache_setvp(ap->a_nch, *ap->a_vpp); 1970 } 1971 } 1972 hammer_done_transaction(&trans); 1973 return (error); 1974 } 1975 1976 /* 1977 * hammer_vop_nwhiteout { nch, dvp, cred, flags } 1978 */ 1979 static 1980 int 1981 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap) 1982 { 1983 struct hammer_transaction trans; 1984 struct hammer_inode *dip; 1985 int error; 1986 1987 dip = VTOI(ap->a_dvp); 1988 1989 if (hammer_nohistory(dip) == 0 && 1990 (error = hammer_checkspace(dip->hmp, HAMMER_CHKSPC_CREATE)) != 0) { 1991 return (error); 1992 } 1993 1994 hammer_start_transaction(&trans, dip->hmp); 1995 ++hammer_stats_file_iopsw; 1996 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, 1997 ap->a_cred, ap->a_flags, -1); 1998 hammer_done_transaction(&trans); 1999 2000 return (error); 2001 } 2002 2003 /* 2004 * hammer_vop_ioctl { vp, command, data, fflag, cred } 2005 */ 2006 static 2007 int 2008 hammer_vop_ioctl(struct vop_ioctl_args *ap) 2009 { 2010 struct hammer_inode *ip = ap->a_vp->v_data; 2011 2012 ++hammer_stats_file_iopsr; 2013 return(hammer_ioctl(ip, ap->a_command, ap->a_data, 2014 ap->a_fflag, ap->a_cred)); 2015 } 2016 2017 static 2018 int 2019 hammer_vop_mountctl(struct vop_mountctl_args *ap) 2020 { 2021 struct mount *mp; 2022 int error; 2023 2024 mp = ap->a_head.a_ops->head.vv_mount; 2025 2026 switch(ap->a_op) { 2027 case MOUNTCTL_SET_EXPORT: 2028 if (ap->a_ctllen != sizeof(struct export_args)) 2029 error = EINVAL; 2030 error = hammer_vfs_export(mp, ap->a_op, 2031 (const struct export_args *)ap->a_ctl); 2032 break; 2033 default: 2034 error = journal_mountctl(ap); 2035 break; 2036 } 2037 return(error); 2038 } 2039 2040 /* 2041 * hammer_vop_strategy { vp, bio } 2042 * 2043 * Strategy call, used for regular file read & write only. Note that the 2044 * bp may represent a cluster. 2045 * 2046 * To simplify operation and allow better optimizations in the future, 2047 * this code does not make any assumptions with regards to buffer alignment 2048 * or size. 2049 */ 2050 static 2051 int 2052 hammer_vop_strategy(struct vop_strategy_args *ap) 2053 { 2054 struct buf *bp; 2055 int error; 2056 2057 bp = ap->a_bio->bio_buf; 2058 2059 switch(bp->b_cmd) { 2060 case BUF_CMD_READ: 2061 error = hammer_vop_strategy_read(ap); 2062 break; 2063 case BUF_CMD_WRITE: 2064 error = hammer_vop_strategy_write(ap); 2065 break; 2066 default: 2067 bp->b_error = error = EINVAL; 2068 bp->b_flags |= B_ERROR; 2069 biodone(ap->a_bio); 2070 break; 2071 } 2072 return (error); 2073 } 2074 2075 /* 2076 * Read from a regular file. Iterate the related records and fill in the 2077 * BIO/BUF. Gaps are zero-filled. 2078 * 2079 * The support code in hammer_object.c should be used to deal with mixed 2080 * in-memory and on-disk records. 2081 * 2082 * NOTE: Can be called from the cluster code with an oversized buf. 2083 * 2084 * XXX atime update 2085 */ 2086 static 2087 int 2088 hammer_vop_strategy_read(struct vop_strategy_args *ap) 2089 { 2090 struct hammer_transaction trans; 2091 struct hammer_inode *ip; 2092 struct hammer_cursor cursor; 2093 hammer_base_elm_t base; 2094 hammer_off_t disk_offset; 2095 struct bio *bio; 2096 struct bio *nbio; 2097 struct buf *bp; 2098 int64_t rec_offset; 2099 int64_t ran_end; 2100 int64_t tmp64; 2101 int error; 2102 int boff; 2103 int roff; 2104 int n; 2105 2106 bio = ap->a_bio; 2107 bp = bio->bio_buf; 2108 ip = ap->a_vp->v_data; 2109 2110 /* 2111 * The zone-2 disk offset may have been set by the cluster code via 2112 * a BMAP operation, or else should be NOOFFSET. 2113 * 2114 * Checking the high bits for a match against zone-2 should suffice. 2115 */ 2116 nbio = push_bio(bio); 2117 if ((nbio->bio_offset & HAMMER_OFF_ZONE_MASK) == 2118 HAMMER_ZONE_LARGE_DATA) { 2119 error = hammer_io_direct_read(ip->hmp, nbio, NULL); 2120 return (error); 2121 } 2122 2123 /* 2124 * Well, that sucked. Do it the hard way. If all the stars are 2125 * aligned we may still be able to issue a direct-read. 2126 */ 2127 hammer_simple_transaction(&trans, ip->hmp); 2128 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 2129 2130 /* 2131 * Key range (begin and end inclusive) to scan. Note that the key's 2132 * stored in the actual records represent BASE+LEN, not BASE. The 2133 * first record containing bio_offset will have a key > bio_offset. 2134 */ 2135 cursor.key_beg.localization = ip->obj_localization + 2136 HAMMER_LOCALIZE_MISC; 2137 cursor.key_beg.obj_id = ip->obj_id; 2138 cursor.key_beg.create_tid = 0; 2139 cursor.key_beg.delete_tid = 0; 2140 cursor.key_beg.obj_type = 0; 2141 cursor.key_beg.key = bio->bio_offset + 1; 2142 cursor.asof = ip->obj_asof; 2143 cursor.flags |= HAMMER_CURSOR_ASOF; 2144 2145 cursor.key_end = cursor.key_beg; 2146 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE); 2147 #if 0 2148 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) { 2149 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 2150 cursor.key_end.rec_type = HAMMER_RECTYPE_DB; 2151 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2152 } else 2153 #endif 2154 { 2155 ran_end = bio->bio_offset + bp->b_bufsize; 2156 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 2157 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 2158 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ 2159 if (tmp64 < ran_end) 2160 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2161 else 2162 cursor.key_end.key = ran_end + MAXPHYS + 1; 2163 } 2164 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 2165 2166 error = hammer_ip_first(&cursor); 2167 boff = 0; 2168 2169 while (error == 0) { 2170 /* 2171 * Get the base file offset of the record. The key for 2172 * data records is (base + bytes) rather then (base). 2173 */ 2174 base = &cursor.leaf->base; 2175 rec_offset = base->key - cursor.leaf->data_len; 2176 2177 /* 2178 * Calculate the gap, if any, and zero-fill it. 2179 * 2180 * n is the offset of the start of the record verses our 2181 * current seek offset in the bio. 2182 */ 2183 n = (int)(rec_offset - (bio->bio_offset + boff)); 2184 if (n > 0) { 2185 if (n > bp->b_bufsize - boff) 2186 n = bp->b_bufsize - boff; 2187 bzero((char *)bp->b_data + boff, n); 2188 boff += n; 2189 n = 0; 2190 } 2191 2192 /* 2193 * Calculate the data offset in the record and the number 2194 * of bytes we can copy. 2195 * 2196 * There are two degenerate cases. First, boff may already 2197 * be at bp->b_bufsize. Secondly, the data offset within 2198 * the record may exceed the record's size. 2199 */ 2200 roff = -n; 2201 rec_offset += roff; 2202 n = cursor.leaf->data_len - roff; 2203 if (n <= 0) { 2204 kprintf("strategy_read: bad n=%d roff=%d\n", n, roff); 2205 n = 0; 2206 } else if (n > bp->b_bufsize - boff) { 2207 n = bp->b_bufsize - boff; 2208 } 2209 2210 /* 2211 * Deal with cached truncations. This cool bit of code 2212 * allows truncate()/ftruncate() to avoid having to sync 2213 * the file. 2214 * 2215 * If the frontend is truncated then all backend records are 2216 * subject to the frontend's truncation. 2217 * 2218 * If the backend is truncated then backend records on-disk 2219 * (but not in-memory) are subject to the backend's 2220 * truncation. In-memory records owned by the backend 2221 * represent data written after the truncation point on the 2222 * backend and must not be truncated. 2223 * 2224 * Truncate operations deal with frontend buffer cache 2225 * buffers and frontend-owned in-memory records synchronously. 2226 */ 2227 if (ip->flags & HAMMER_INODE_TRUNCATED) { 2228 if (hammer_cursor_ondisk(&cursor) || 2229 cursor.iprec->flush_state == HAMMER_FST_FLUSH) { 2230 if (ip->trunc_off <= rec_offset) 2231 n = 0; 2232 else if (ip->trunc_off < rec_offset + n) 2233 n = (int)(ip->trunc_off - rec_offset); 2234 } 2235 } 2236 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { 2237 if (hammer_cursor_ondisk(&cursor)) { 2238 if (ip->sync_trunc_off <= rec_offset) 2239 n = 0; 2240 else if (ip->sync_trunc_off < rec_offset + n) 2241 n = (int)(ip->sync_trunc_off - rec_offset); 2242 } 2243 } 2244 2245 /* 2246 * Try to issue a direct read into our bio if possible, 2247 * otherwise resolve the element data into a hammer_buffer 2248 * and copy. 2249 * 2250 * The buffer on-disk should be zerod past any real 2251 * truncation point, but may not be for any synthesized 2252 * truncation point from above. 2253 */ 2254 disk_offset = cursor.leaf->data_offset + roff; 2255 if (boff == 0 && n == bp->b_bufsize && 2256 hammer_cursor_ondisk(&cursor) && 2257 (disk_offset & HAMMER_BUFMASK) == 0) { 2258 KKASSERT((disk_offset & HAMMER_OFF_ZONE_MASK) == 2259 HAMMER_ZONE_LARGE_DATA); 2260 nbio->bio_offset = disk_offset; 2261 error = hammer_io_direct_read(trans.hmp, nbio, 2262 cursor.leaf); 2263 goto done; 2264 } else if (n) { 2265 error = hammer_ip_resolve_data(&cursor); 2266 if (error == 0) { 2267 bcopy((char *)cursor.data + roff, 2268 (char *)bp->b_data + boff, n); 2269 } 2270 } 2271 if (error) 2272 break; 2273 2274 /* 2275 * Iterate until we have filled the request. 2276 */ 2277 boff += n; 2278 if (boff == bp->b_bufsize) 2279 break; 2280 error = hammer_ip_next(&cursor); 2281 } 2282 2283 /* 2284 * There may have been a gap after the last record 2285 */ 2286 if (error == ENOENT) 2287 error = 0; 2288 if (error == 0 && boff != bp->b_bufsize) { 2289 KKASSERT(boff < bp->b_bufsize); 2290 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff); 2291 /* boff = bp->b_bufsize; */ 2292 } 2293 bp->b_resid = 0; 2294 bp->b_error = error; 2295 if (error) 2296 bp->b_flags |= B_ERROR; 2297 biodone(ap->a_bio); 2298 2299 done: 2300 if (cursor.node) 2301 hammer_cache_node(&ip->cache[1], cursor.node); 2302 hammer_done_cursor(&cursor); 2303 hammer_done_transaction(&trans); 2304 return(error); 2305 } 2306 2307 /* 2308 * BMAP operation - used to support cluster_read() only. 2309 * 2310 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb) 2311 * 2312 * This routine may return EOPNOTSUPP if the opration is not supported for 2313 * the specified offset. The contents of the pointer arguments do not 2314 * need to be initialized in that case. 2315 * 2316 * If a disk address is available and properly aligned return 0 with 2317 * *doffsetp set to the zone-2 address, and *runp / *runb set appropriately 2318 * to the run-length relative to that offset. Callers may assume that 2319 * *doffsetp is valid if 0 is returned, even if *runp is not sufficiently 2320 * large, so return EOPNOTSUPP if it is not sufficiently large. 2321 */ 2322 static 2323 int 2324 hammer_vop_bmap(struct vop_bmap_args *ap) 2325 { 2326 struct hammer_transaction trans; 2327 struct hammer_inode *ip; 2328 struct hammer_cursor cursor; 2329 hammer_base_elm_t base; 2330 int64_t rec_offset; 2331 int64_t ran_end; 2332 int64_t tmp64; 2333 int64_t base_offset; 2334 int64_t base_disk_offset; 2335 int64_t last_offset; 2336 hammer_off_t last_disk_offset; 2337 hammer_off_t disk_offset; 2338 int rec_len; 2339 int error; 2340 int blksize; 2341 2342 ++hammer_stats_file_iopsr; 2343 ip = ap->a_vp->v_data; 2344 2345 /* 2346 * We can only BMAP regular files. We can't BMAP database files, 2347 * directories, etc. 2348 */ 2349 if (ip->ino_data.obj_type != HAMMER_OBJTYPE_REGFILE) 2350 return(EOPNOTSUPP); 2351 2352 /* 2353 * bmap is typically called with runp/runb both NULL when used 2354 * for writing. We do not support BMAP for writing atm. 2355 */ 2356 if (ap->a_cmd != BUF_CMD_READ) 2357 return(EOPNOTSUPP); 2358 2359 /* 2360 * Scan the B-Tree to acquire blockmap addresses, then translate 2361 * to raw addresses. 2362 */ 2363 hammer_simple_transaction(&trans, ip->hmp); 2364 #if 0 2365 kprintf("bmap_beg %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]); 2366 #endif 2367 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip); 2368 2369 /* 2370 * Key range (begin and end inclusive) to scan. Note that the key's 2371 * stored in the actual records represent BASE+LEN, not BASE. The 2372 * first record containing bio_offset will have a key > bio_offset. 2373 */ 2374 cursor.key_beg.localization = ip->obj_localization + 2375 HAMMER_LOCALIZE_MISC; 2376 cursor.key_beg.obj_id = ip->obj_id; 2377 cursor.key_beg.create_tid = 0; 2378 cursor.key_beg.delete_tid = 0; 2379 cursor.key_beg.obj_type = 0; 2380 if (ap->a_runb) 2381 cursor.key_beg.key = ap->a_loffset - MAXPHYS + 1; 2382 else 2383 cursor.key_beg.key = ap->a_loffset + 1; 2384 if (cursor.key_beg.key < 0) 2385 cursor.key_beg.key = 0; 2386 cursor.asof = ip->obj_asof; 2387 cursor.flags |= HAMMER_CURSOR_ASOF; 2388 2389 cursor.key_end = cursor.key_beg; 2390 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE); 2391 2392 ran_end = ap->a_loffset + MAXPHYS; 2393 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 2394 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 2395 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ 2396 if (tmp64 < ran_end) 2397 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 2398 else 2399 cursor.key_end.key = ran_end + MAXPHYS + 1; 2400 2401 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 2402 2403 error = hammer_ip_first(&cursor); 2404 base_offset = last_offset = 0; 2405 base_disk_offset = last_disk_offset = 0; 2406 2407 while (error == 0) { 2408 /* 2409 * Get the base file offset of the record. The key for 2410 * data records is (base + bytes) rather then (base). 2411 * 2412 * NOTE: rec_offset + rec_len may exceed the end-of-file. 2413 * The extra bytes should be zero on-disk and the BMAP op 2414 * should still be ok. 2415 */ 2416 base = &cursor.leaf->base; 2417 rec_offset = base->key - cursor.leaf->data_len; 2418 rec_len = cursor.leaf->data_len; 2419 2420 /* 2421 * Incorporate any cached truncation. 2422 * 2423 * NOTE: Modifications to rec_len based on synthesized 2424 * truncation points remove the guarantee that any extended 2425 * data on disk is zero (since the truncations may not have 2426 * taken place on-media yet). 2427 */ 2428 if (ip->flags & HAMMER_INODE_TRUNCATED) { 2429 if (hammer_cursor_ondisk(&cursor) || 2430 cursor.iprec->flush_state == HAMMER_FST_FLUSH) { 2431 if (ip->trunc_off <= rec_offset) 2432 rec_len = 0; 2433 else if (ip->trunc_off < rec_offset + rec_len) 2434 rec_len = (int)(ip->trunc_off - rec_offset); 2435 } 2436 } 2437 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { 2438 if (hammer_cursor_ondisk(&cursor)) { 2439 if (ip->sync_trunc_off <= rec_offset) 2440 rec_len = 0; 2441 else if (ip->sync_trunc_off < rec_offset + rec_len) 2442 rec_len = (int)(ip->sync_trunc_off - rec_offset); 2443 } 2444 } 2445 2446 /* 2447 * Accumulate information. If we have hit a discontiguous 2448 * block reset base_offset unless we are already beyond the 2449 * requested offset. If we are, that's it, we stop. 2450 */ 2451 if (error) 2452 break; 2453 if (hammer_cursor_ondisk(&cursor)) { 2454 disk_offset = cursor.leaf->data_offset; 2455 if (rec_offset != last_offset || 2456 disk_offset != last_disk_offset) { 2457 if (rec_offset > ap->a_loffset) 2458 break; 2459 base_offset = rec_offset; 2460 base_disk_offset = disk_offset; 2461 } 2462 last_offset = rec_offset + rec_len; 2463 last_disk_offset = disk_offset + rec_len; 2464 } 2465 error = hammer_ip_next(&cursor); 2466 } 2467 2468 #if 0 2469 kprintf("BMAP %016llx: %016llx - %016llx\n", 2470 ap->a_loffset, base_offset, last_offset); 2471 kprintf("BMAP %16s: %016llx - %016llx\n", 2472 "", base_disk_offset, last_disk_offset); 2473 #endif 2474 2475 if (cursor.node) { 2476 hammer_cache_node(&ip->cache[1], cursor.node); 2477 #if 0 2478 kprintf("bmap_end2 %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]); 2479 #endif 2480 } 2481 hammer_done_cursor(&cursor); 2482 hammer_done_transaction(&trans); 2483 2484 /* 2485 * If we couldn't find any records or the records we did find were 2486 * all behind the requested offset, return failure. A forward 2487 * truncation can leave a hole w/ no on-disk records. 2488 */ 2489 if (last_offset == 0 || last_offset < ap->a_loffset) 2490 return (EOPNOTSUPP); 2491 2492 /* 2493 * Figure out the block size at the requested offset and adjust 2494 * our limits so the cluster_read() does not create inappropriately 2495 * sized buffer cache buffers. 2496 */ 2497 blksize = hammer_blocksize(ap->a_loffset); 2498 if (hammer_blocksize(base_offset) != blksize) { 2499 base_offset = hammer_blockdemarc(base_offset, ap->a_loffset); 2500 } 2501 if (last_offset != ap->a_loffset && 2502 hammer_blocksize(last_offset - 1) != blksize) { 2503 last_offset = hammer_blockdemarc(ap->a_loffset, 2504 last_offset - 1); 2505 } 2506 2507 /* 2508 * Returning EOPNOTSUPP simply prevents the direct-IO optimization 2509 * from occuring. 2510 */ 2511 disk_offset = base_disk_offset + (ap->a_loffset - base_offset); 2512 2513 if ((disk_offset & HAMMER_OFF_ZONE_MASK) != HAMMER_ZONE_LARGE_DATA) { 2514 /* 2515 * Only large-data zones can be direct-IOd 2516 */ 2517 error = EOPNOTSUPP; 2518 } else if ((disk_offset & HAMMER_BUFMASK) || 2519 (last_offset - ap->a_loffset) < blksize) { 2520 /* 2521 * doffsetp is not aligned or the forward run size does 2522 * not cover a whole buffer, disallow the direct I/O. 2523 */ 2524 error = EOPNOTSUPP; 2525 } else { 2526 /* 2527 * We're good. 2528 */ 2529 *ap->a_doffsetp = disk_offset; 2530 if (ap->a_runb) { 2531 *ap->a_runb = ap->a_loffset - base_offset; 2532 KKASSERT(*ap->a_runb >= 0); 2533 } 2534 if (ap->a_runp) { 2535 *ap->a_runp = last_offset - ap->a_loffset; 2536 KKASSERT(*ap->a_runp >= 0); 2537 } 2538 error = 0; 2539 } 2540 return(error); 2541 } 2542 2543 /* 2544 * Write to a regular file. Because this is a strategy call the OS is 2545 * trying to actually get data onto the media. 2546 */ 2547 static 2548 int 2549 hammer_vop_strategy_write(struct vop_strategy_args *ap) 2550 { 2551 hammer_record_t record; 2552 hammer_mount_t hmp; 2553 hammer_inode_t ip; 2554 struct bio *bio; 2555 struct buf *bp; 2556 int blksize; 2557 int bytes; 2558 int error; 2559 2560 bio = ap->a_bio; 2561 bp = bio->bio_buf; 2562 ip = ap->a_vp->v_data; 2563 hmp = ip->hmp; 2564 2565 blksize = hammer_blocksize(bio->bio_offset); 2566 KKASSERT(bp->b_bufsize == blksize); 2567 2568 if (ip->flags & HAMMER_INODE_RO) { 2569 bp->b_error = EROFS; 2570 bp->b_flags |= B_ERROR; 2571 biodone(ap->a_bio); 2572 return(EROFS); 2573 } 2574 2575 /* 2576 * Interlock with inode destruction (no in-kernel or directory 2577 * topology visibility). If we queue new IO while trying to 2578 * destroy the inode we can deadlock the vtrunc call in 2579 * hammer_inode_unloadable_check(). 2580 */ 2581 if (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) { 2582 bp->b_resid = 0; 2583 biodone(ap->a_bio); 2584 return(0); 2585 } 2586 2587 /* 2588 * Reserve space and issue a direct-write from the front-end. 2589 * NOTE: The direct_io code will hammer_bread/bcopy smaller 2590 * allocations. 2591 * 2592 * An in-memory record will be installed to reference the storage 2593 * until the flusher can get to it. 2594 * 2595 * Since we own the high level bio the front-end will not try to 2596 * do a direct-read until the write completes. 2597 * 2598 * NOTE: The only time we do not reserve a full-sized buffers 2599 * worth of data is if the file is small. We do not try to 2600 * allocate a fragment (from the small-data zone) at the end of 2601 * an otherwise large file as this can lead to wildly separated 2602 * data. 2603 */ 2604 KKASSERT((bio->bio_offset & HAMMER_BUFMASK) == 0); 2605 KKASSERT(bio->bio_offset < ip->ino_data.size); 2606 if (bio->bio_offset || ip->ino_data.size > HAMMER_BUFSIZE / 2) 2607 bytes = bp->b_bufsize; 2608 else 2609 bytes = ((int)ip->ino_data.size + 15) & ~15; 2610 2611 record = hammer_ip_add_bulk(ip, bio->bio_offset, bp->b_data, 2612 bytes, &error); 2613 if (record) { 2614 hammer_io_direct_write(hmp, record, bio); 2615 if (ip->rsv_recs > 1 && hmp->rsv_recs > hammer_limit_recs) 2616 hammer_flush_inode(ip, 0); 2617 } else { 2618 bp->b_bio2.bio_offset = NOOFFSET; 2619 bp->b_error = error; 2620 bp->b_flags |= B_ERROR; 2621 biodone(ap->a_bio); 2622 } 2623 return(error); 2624 } 2625 2626 /* 2627 * dounlink - disconnect a directory entry 2628 * 2629 * XXX whiteout support not really in yet 2630 */ 2631 static int 2632 hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, 2633 struct vnode *dvp, struct ucred *cred, 2634 int flags, int isdir) 2635 { 2636 struct namecache *ncp; 2637 hammer_inode_t dip; 2638 hammer_inode_t ip; 2639 struct hammer_cursor cursor; 2640 int64_t namekey; 2641 int nlen, error; 2642 2643 /* 2644 * Calculate the namekey and setup the key range for the scan. This 2645 * works kinda like a chained hash table where the lower 32 bits 2646 * of the namekey synthesize the chain. 2647 * 2648 * The key range is inclusive of both key_beg and key_end. 2649 */ 2650 dip = VTOI(dvp); 2651 ncp = nch->ncp; 2652 2653 if (dip->flags & HAMMER_INODE_RO) 2654 return (EROFS); 2655 2656 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen); 2657 retry: 2658 hammer_init_cursor(trans, &cursor, &dip->cache[1], dip); 2659 cursor.key_beg.localization = dip->obj_localization + 2660 HAMMER_LOCALIZE_MISC; 2661 cursor.key_beg.obj_id = dip->obj_id; 2662 cursor.key_beg.key = namekey; 2663 cursor.key_beg.create_tid = 0; 2664 cursor.key_beg.delete_tid = 0; 2665 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 2666 cursor.key_beg.obj_type = 0; 2667 2668 cursor.key_end = cursor.key_beg; 2669 cursor.key_end.key |= 0xFFFFFFFFULL; 2670 cursor.asof = dip->obj_asof; 2671 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; 2672 2673 /* 2674 * Scan all matching records (the chain), locate the one matching 2675 * the requested path component. info->last_error contains the 2676 * error code on search termination and could be 0, ENOENT, or 2677 * something else. 2678 * 2679 * The hammer_ip_*() functions merge in-memory records with on-disk 2680 * records for the purposes of the search. 2681 */ 2682 error = hammer_ip_first(&cursor); 2683 2684 while (error == 0) { 2685 error = hammer_ip_resolve_data(&cursor); 2686 if (error) 2687 break; 2688 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF; 2689 KKASSERT(nlen > 0); 2690 if (ncp->nc_nlen == nlen && 2691 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) { 2692 break; 2693 } 2694 error = hammer_ip_next(&cursor); 2695 } 2696 2697 /* 2698 * If all is ok we have to get the inode so we can adjust nlinks. 2699 * To avoid a deadlock with the flusher we must release the inode 2700 * lock on the directory when acquiring the inode for the entry. 2701 * 2702 * If the target is a directory, it must be empty. 2703 */ 2704 if (error == 0) { 2705 hammer_unlock(&cursor.ip->lock); 2706 ip = hammer_get_inode(trans, dip, cursor.data->entry.obj_id, 2707 dip->hmp->asof, 2708 cursor.data->entry.localization, 2709 0, &error); 2710 hammer_lock_sh(&cursor.ip->lock); 2711 if (error == ENOENT) { 2712 kprintf("obj_id %016llx\n", cursor.data->entry.obj_id); 2713 Debugger("ENOENT unlinking object that should exist"); 2714 } 2715 2716 /* 2717 * If isdir >= 0 we validate that the entry is or is not a 2718 * directory. If isdir < 0 we don't care. 2719 */ 2720 if (error == 0 && isdir >= 0) { 2721 if (isdir && 2722 ip->ino_data.obj_type != HAMMER_OBJTYPE_DIRECTORY) { 2723 error = ENOTDIR; 2724 } else if (isdir == 0 && 2725 ip->ino_data.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 2726 error = EISDIR; 2727 } 2728 } 2729 2730 /* 2731 * If we are trying to remove a directory the directory must 2732 * be empty. 2733 * 2734 * WARNING: hammer_ip_check_directory_empty() may have to 2735 * terminate the cursor to avoid a deadlock. It is ok to 2736 * call hammer_done_cursor() twice. 2737 */ 2738 if (error == 0 && ip->ino_data.obj_type == 2739 HAMMER_OBJTYPE_DIRECTORY) { 2740 error = hammer_ip_check_directory_empty(trans, ip); 2741 } 2742 2743 /* 2744 * Delete the directory entry. 2745 * 2746 * WARNING: hammer_ip_del_directory() may have to terminate 2747 * the cursor to avoid a deadlock. It is ok to call 2748 * hammer_done_cursor() twice. 2749 */ 2750 if (error == 0) { 2751 error = hammer_ip_del_directory(trans, &cursor, 2752 dip, ip); 2753 } 2754 hammer_done_cursor(&cursor); 2755 if (error == 0) { 2756 cache_setunresolved(nch); 2757 cache_setvp(nch, NULL); 2758 /* XXX locking */ 2759 if (ip->vp) 2760 cache_inval_vp(ip->vp, CINV_DESTROY); 2761 } 2762 if (ip) 2763 hammer_rel_inode(ip, 0); 2764 } else { 2765 hammer_done_cursor(&cursor); 2766 } 2767 hammer_inode_waitreclaims(dip->hmp); 2768 if (error == EDEADLK) 2769 goto retry; 2770 2771 return (error); 2772 } 2773 2774 /************************************************************************ 2775 * FIFO AND SPECFS OPS * 2776 ************************************************************************ 2777 * 2778 */ 2779 2780 static int 2781 hammer_vop_fifoclose (struct vop_close_args *ap) 2782 { 2783 /* XXX update itimes */ 2784 return (VOCALL(&fifo_vnode_vops, &ap->a_head)); 2785 } 2786 2787 static int 2788 hammer_vop_fiforead (struct vop_read_args *ap) 2789 { 2790 int error; 2791 2792 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2793 /* XXX update access time */ 2794 return (error); 2795 } 2796 2797 static int 2798 hammer_vop_fifowrite (struct vop_write_args *ap) 2799 { 2800 int error; 2801 2802 error = VOCALL(&fifo_vnode_vops, &ap->a_head); 2803 /* XXX update access time */ 2804 return (error); 2805 } 2806 2807 static int 2808 hammer_vop_specclose (struct vop_close_args *ap) 2809 { 2810 /* XXX update itimes */ 2811 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 2812 } 2813 2814 static int 2815 hammer_vop_specread (struct vop_read_args *ap) 2816 { 2817 /* XXX update access time */ 2818 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 2819 } 2820 2821 static int 2822 hammer_vop_specwrite (struct vop_write_args *ap) 2823 { 2824 /* XXX update last change time */ 2825 return (VOCALL(&spec_vnode_vops, &ap->a_head)); 2826 } 2827 2828